集成学习

Hard Voting硬投票法

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")


X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

voting_clf = VotingClassifier(estimators=[
    ('log_clf',LogisticRegression()),
    ('svm_clf',SVC()),
    ('dt_clf',DecisionTreeClassifier(random_state=666))
],voting = 'hard')

voting_clf.fit(X_train,y_train)
voting_clf.score(X_test,y_test)

输出准确率:0.896

Soft Voting软投票法

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")

X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

voting_clf2 = VotingClassifier(estimators=[
    ('log_clf',LogisticRegression()),
    ('svm_clf',SVC(probability=True)),
    ('dt_clf',DecisionTreeClassifier(random_state=666))
],voting = 'soft')

voting_clf2.fit(X_train,y_train)
voting_clf2.score(X_test,y_test)

输出准确率:0.912

Bagging放回取样

n_estimators=500基学习器的数量为500个
max_samples=100训练每个基学习器的样本数量为100个

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
warnings.filterwarnings("ignore")


X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

from sklearn.ensemble import BaggingClassifier
bagging_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True)
bagging_clf.fit(X_train,y_train)
bagging_clf.score(X_test,y_test)

输出准确率:0.916

Random Subspaces针对特征进行随机采样

max_features : The number of features to draw from X to train each base estimator.

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import BaggingClassifier

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)

ramdom_subspaces_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=1.0,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True)
ramdom_subspaces_clf.fit(X,y)
ramdom_subspaces_clf.oob_score_

输出准确率:0.824

Random Patches针对样本和特征进行随机采样

from sklearn.ensemble import BaggingClassifier
import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)

ramdom_patches_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True)
ramdom_patches_clf.fit(X,y)
ramdom_patches_clf.oob_score_

输出准确率:0.86

Random Forest随机森林

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)

rf_clf = RandomForestClassifier(n_estimators=500,random_state=666,oob_score=True)
rf_clf.fit(X,y)
rf_clf.oob_score_

输出准确率:0.896

Extra-Trees极其随机森林

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)

from sklearn.ensemble import ExtraTreesClassifier
et_clf = ExtraTreesClassifier(n_estimators=500,bootstrap=True,oob_score=True,random_state=666)
et_clf.fit(X,y)
et_clf.oob_score_

输出准确率:0.892

AdaBoosting

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),n_estimators=500,random_state=666)
ada_clf.fit(X_train,y_train)
ada_clf.score(X_test,y_test)

输出准确率:0.872

Gradient Boosting Decision Tree(GBDT)梯度提升树

import numpy as np
from sklearn import datasets
import  warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier

warnings.filterwarnings("ignore")
X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

gb_clf = GradientBoostingClassifier(max_depth=2, n_estimators=30)
gb_clf.fit(X_train,y_train)
gb_clf.score(X_test,y_test)

输出准确率:0.912

全部评论

相关推荐

自从我室友在计算机导论课上听说了“刷 LeetCode 是进入大厂的敲门砖”,整个人就跟走火入魔了一样。他在宿舍门口贴了一张A4纸,上面写着:“正在 DP,请勿打扰,否则 Time Limit Exceeded。”日记本的扉页被他用黑色水笔加粗描了三遍:“Talk is cheap. Show me the code。”连宿舍聚餐,他都要给我们讲解:“今天的座位安排可以用回溯算法解决,但为了避免栈溢出,我建议用动态规划。来,这是状态转移方程:dp[i][j] 代表第 i 个人坐在第 j 个位置的最优解。”我让他去楼下取个快递,他不直接去,非要在门口踱步,嘴里念念有词:“这是一个图的遍历问题。从宿舍楼(root)到驿站(target node),我应该用 BFS 还是 DFS?嗯,求最短路径,还是广度优先好。”和同学约好出去开黑,他会提前发消息:“集合点 (x, y),我们俩的路径有 k 个交点,为了最小化时间复杂度,应该在 (x/2, y/2) 处汇合。”有一次另一个室友低血糖犯了,让他帮忙找颗糖,他居然冷静地分析道:“别急,这是一个查找问题。零食箱是无序数组,暴力查找是 O(n)。如果按甜度排序,我就可以用二分查找,时间复杂度降到 O(log n)。”他做卫生也要讲究算法效率:“拖地是典型的岛屿问题,要先把连通的污渍区块都清理掉。倒垃圾可以用双指针法,一个指针从左往右,一个从右往左,能最快匹配垃圾分类。”现在我们宿舍的画风已经完全变了,大家不聊游戏和妹子,对话都是这样的:“你 Two Sum 刷了几遍了?”“别提了,昨天遇到一道 Hard 题,我连暴力解都想不出来,最后只能看题解。你呢?”“我动态规划还不行,总是找不到最优子结构。今天那道接雨水给我整麻了。”……LeetCode 真的害了我室友!!!
老六f:编程嘉豪来了
AI时代还有必要刷lee...
点赞 评论 收藏
分享
评论
点赞
收藏
分享

创作者周榜

更多
牛客网
牛客网在线编程
牛客网题解
牛客企业服务