import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
font = {
"family": "Microsoft YaHei"
}
matplotlib.rc("font", **font)
data = pd.read_excel("药品.xlsx", header=None).iloc[:2, :].T
data.columns = ["Class", "Name"]
data = data.dropna()
data = data.drop_duplicates()
data["Class"] = data["Class"].apply(lambda x: 0 if x not in [1, 2, 3] else int(x))
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, 1], data.iloc[:, 0], test_size=0.25)
# 对数据集进行特征抽取
tf = TfidfVectorizer()
# 将训练集特征值进行特征抽取
x_train = tf.fit_transform(x_train)
# 将测试集特征值进行特征抽取
x_test = tf.transform(x_test)
# 进行朴素贝叶斯算法的预测
mlt = MultinomialNB()
mlt.fit(x_train, y_train)
y_predict = mlt.predict(x_test)
print("每个类别的精确率和召回率:", classification_report(y_test, y_predict))
- 1
- 2
前往页