Kaggle

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier


train_data = pd.read_csv('/kaggle/input/titanic/train.csv')
train_data.head()   # 训练集

test_data = pd.read_csv('/kaggle/input/titanic/test.csv')
test_data.head()    # 测试集


features = ['Pclass', 'Sex', 'SibSp', 'Parch']
X = pd.get_dummies(train_data[features])    # X:训练数据
y = train_data['Survived']  # y:类别


model = RandomForestClassifier(n_estimators=100,    
                               max_depth=5, random_state=1)
model.fit(X,y)  # 模型


X_test = pd.get_dummies(test_data[features])    # 测试
predictions = model.predict(X_test)


output = pd.DataFrame({'PassengerId': test_data.PassengerId,
                       'Survived': predictions})
output.to_csv('my_submission.csv', index=False) # 输出


print('Your submission was successfully saved!')
原文地址:https://www.cnblogs.com/holaworld/p/12501514.html