['rainy', 'mild', 'high', 'weak', 'yes'],
['rainy', 'cool', 'normal', 'weak', 'yes'],
['rainy', 'cool', 'normal', 'strong', 'no'],
['overcast', 'cool', 'normal', 'strong', 'yes']]
features = ['weather', 'temperature', 'humidity', 'wind']
labels = ['yes', 'no']
#
数据预处理
X = []
Y = []
for row in data:
X.append(row[:-1])
Y.append(row[-1])
#
将特征和标签转换为数值
def convert_to_numeric(data, features, labels):
feature_dict = {feature: {} for feature in features}
label_dict = {}
for i, feature in enumerate(features):
unique_values = set([row[i] for row in data])
for value in unique_values:
feature_dict[feature][value] = len(feature_dict[feature])
for i, label in enumerate(labels):
label_dict[label] = i
return feature_dict, label_dict
feature_dict, label_dict = convert_to_numeric(X, features, labels)
X_numeric = [[feature_dict[feature][value] for feature, value in zip(features, row)] for row in X]
Y_numeric = [label_dict[label] for label in Y]
#
划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X_numeric, Y_numeric, test_size=0.3, random_s
tate=1)
#
构建决策树
clf = DecisionTreeClassifier(criterion='entropy')
clf = clf.fit(X_train, Y_train)
#
可视化决策树
plt.figure(figsize=(15,10))
tree.plot_tree(clf, filled=True)
plt.show()