基本信息
源码名称:python 叶子分类器 算法示例源码
源码大小:0.60M
文件格式:.zip
开发语言:Python
更新时间:2016-12-21
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300

本次赞助数额为: 2 元 
   源码介绍

#import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score,log_loss
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC,LinearSVC,NuSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import  LabelEncoder
train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
def encode(train,test):
    le=LabelEncoder().fit(train.species)
    labels=le.transform(train.species)
    classes=list(le.classes_)
    test_ids=test.id
    train=train.drop(['species','id'],axis=1)
    test=test.drop(['id'],axis=1)
    return train,labels,test,test_ids,classes
train, labels, test, test_ids, classes = encode(train, test)
X_train, X_test = train.values[train.index], train.values[test.index]
y_train, y_test = labels[train.index], labels[test.index]
classifiers=[
    KNeighborsClassifier(3),
    SVC(kernel="rbf",probability=True),
    NuSVC(probability=True),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    GaussianNB(),
    ]
log_cols=["Classifier","Accuracy","Log Loss"]
log=pd.DataFrame(columns=log_cols)
for clf in classifiers:
    clf.fit(X_train,y_train)
    name=clf.__class__.__name__
    print ("="*30)
    print name
    print ('*****Results****')
    train_predictions=clf.predict(X_test)
    acc=accuracy_score(y_test,train_predictions)
    print ("Accuracy:{:.4%}".format(acc))
    train_predictions=clf.predict_proba(X_test)
    ll=log_loss(y_test,train_predictions)
    print ("Log Loss:{}".format(ll))
    log_entry = pd.DataFrame([[name, acc * 100, ll]], columns=log_cols)
    log = log.append(log_entry)
print ("="*30)
sns.set_color_codes("muted")
sns.barplot(x='Accuracy', y='Classifier', data=log, color="b")
plt.xlabel('Accuracy %')
plt.title('Classifier performance comparison')
plt.show()
sns.set_color_codes("muted")
sns.barplot(x='Log Loss', y='Classifier', data=log, color="g")
plt.xlabel('Log Loss')
plt.title('Classifier Log Loss')
plt.show()
favorite_clf = LinearDiscriminantAnalysis()
favorite_clf.fit(X_train, y_train)
test_predictions = favorite_clf.predict_proba(test)
submission = pd.DataFrame(test_predictions, columns=classes)
submission.insert(0, 'id', test_ids)
submission.reset_index()
submission.tail()