基本信息
源码名称:python 叶子分类器 算法示例源码
源码大小:0.60M
文件格式:.zip
开发语言:Python
更新时间:2016-12-21
友情提示:(无需注册或充值,赞助后即可获取资源下载链接)
嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300
本次赞助数额为: 2 元×
微信扫码支付:2 元
×
请留下您的邮箱,我们将在2小时内将文件发到您的邮箱
源码介绍
#import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score,log_loss from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC,LinearSVC,NuSVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.preprocessing import LabelEncoder train=pd.read_csv("train.csv") test=pd.read_csv("test.csv") def encode(train,test): le=LabelEncoder().fit(train.species) labels=le.transform(train.species) classes=list(le.classes_) test_ids=test.id train=train.drop(['species','id'],axis=1) test=test.drop(['id'],axis=1) return train,labels,test,test_ids,classes train, labels, test, test_ids, classes = encode(train, test) X_train, X_test = train.values[train.index], train.values[test.index] y_train, y_test = labels[train.index], labels[test.index] classifiers=[ KNeighborsClassifier(3), SVC(kernel="rbf",probability=True), NuSVC(probability=True), DecisionTreeClassifier(), RandomForestClassifier(), AdaBoostClassifier(), GradientBoostingClassifier(), GaussianNB(), ] log_cols=["Classifier","Accuracy","Log Loss"] log=pd.DataFrame(columns=log_cols) for clf in classifiers: clf.fit(X_train,y_train) name=clf.__class__.__name__ print ("="*30) print name print ('*****Results****') train_predictions=clf.predict(X_test) acc=accuracy_score(y_test,train_predictions) print ("Accuracy:{:.4%}".format(acc)) train_predictions=clf.predict_proba(X_test) ll=log_loss(y_test,train_predictions) print ("Log Loss:{}".format(ll)) log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols) log = log.append(log_entry) print ("="*30) sns.set_color_codes("muted") sns.barplot(x='Accuracy', y='Classifier', data=log, color="b") plt.xlabel('Accuracy %') plt.title('Classifier performance comparison') plt.show() sns.set_color_codes("muted") sns.barplot(x='Log Loss', y='Classifier', data=log, color="g") plt.xlabel('Log Loss') plt.title('Classifier Log Loss') plt.show() favorite_clf = LinearDiscriminantAnalysis() favorite_clf.fit(X_train, y_train) test_predictions = favorite_clf.predict_proba(test) submission = pd.DataFrame(test_predictions, columns=classes) submission.insert(0, 'id', test_ids) submission.reset_index() submission.tail()