学习知识
多标签(multi-label)数据的学习问题,常用的分类器或者分类策略有哪些?
深度学习模型处理多标签(multi_label)分类任务——keras实战
[xgboost 与 lightgbm 多分类 与 多标签 任务 python代码实例]https://blog.csdn.net/lrs1353281004/article/details/103350559
xgboost 多分类任务
from xgboost import XGBClassifier
import numpy as np
clf_multiclass = XGBClassifier()
train_data = np.random.rand(500, 100) # 500 entities, each contains 100 features
train_label = np.random.randint(5, size=500) # 5 targets
val_data = np.random.rand(100, 100)
clf_multiclass.fit(train_data,train_label)
val_pred = clf_multiclass.predict(val_data)
lightgbm 多分类任务
from lightgbm import LGBMClassifier
import numpy as np
clf_multiclass = LGBMClassifier()
train_data = np.random.rand(500, 100) # 500 entities, each contains 100 features
train_label = np.random.randint(5, size=500) # 5 targets
val_data = np.random.rand(100, 100)
clf_multiclass.fit(train_data,train_label)
val_pred = clf_multiclass.predict(val_data)
xgboost 多标签任务
from xgboost import XGBClassifier
from sklearn.multiclass import OneVsRestClassifier
import numpy as np
clf_multilabel = OneVsRestClassifier(XGBClassifier())
train_data = np.random.rand(500, 100) # 500 entities, each contains 100 features
train_label = np.random.randint(2, size=(500,20)) # 20 targets
val_data = np.random.rand(100, 100)
clf_multilabel.fit(train_data,train_label)
val_pred = clf_multilabel.predict(val_data)
lightgbm 多标签任务
from lightgbm import LGBMClassifier
from sklearn.multiclass import OneVsRestClassifier
import numpy as np
clf_multilabel = OneVsRestClassifier(LGBMClassifier())
train_data = np.random.rand(500, 100) # 500 entities, each contains 100 features
train_label = np.random.randint(2, size=(500,20)) # 20 targets
val_data = np.random.rand(100, 100)
clf_multilabel.fit(train_data,train_label)
val_pred = clf_multilabel.predict(val_data)
# ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import ExtraTreeClassifier
X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=0)
extra_tree = ExtraTreeClassifier(random_state=0)
cls = BaggingClassifier(extra_tree, random_state=0).fit( X_train, y_train)
cls.score(X_test, y_test)
# ExtraTreesClassifier
from sklearn.ensemble import ExtraTreesClassifier
clf = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf.fit(X_train, y_train)
clf.predict(X_test)
#
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X_train, y_train)
print(neigh.predict(X_test))
# predict_proba(X)返回测试数据X的返回概率估计
print(neigh.predict_proba(y_test))
#
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X_train, y_train)
print(neigh.predict(X_test)
# predict_proba(X)返回测试数据X的返回概率估计
print(neigh.predict_proba(X_test)
#
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
... n_informative=2, n_redundant=0,
... random_state=0, shuffle=False)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X_train, y_train)
# predict_log_proba(X)预测X的类对数概率。将输入样本的预测类别对数概率计算为森林中树木的平均预测类别对数的对数
print(clf.predict(X_test)
# predict_proba(X)预测X的类概率。将输入样本的预测类别概率计算为森林中树木的平均预测类别概率。一棵树的类别概率是叶子中同一类别的样本的分数。
print(predict_proba(X_test)