# Detection of breast cancer using KNN and SVM
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import model_selection
from sklearn.metrics import classification_report, accuracy_score
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import pandas as pd
# Load Dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
names = ['id', 'clump_thickness', 'uniform_cell_size', 'uniform_cell_shape',
'marginal_adhesion', 'single_epithelial_size', 'bare_nuclei',
'bland_chromatin', 'normal_nucleoli', 'mitoses', 'class']
df = pd.read_csv(url, names=names)
# Pre-process the data
df.replace('?',-99999, inplace=True)
print(df.axes)
df.drop(['id'], 1, inplace=True)
# explore the dataset and do a few visualizations
print(df.loc[10])
# Print the shape of the dataset
print(df.shape)
# Describe the dataset
print(df.describe())
# Plot histograms for each variable
df.hist(figsize = (10, 10))
plt.show()
# Create scatter plot matrix to know relationships among variables
scatter_matrix(df, figsize = (18,18))
plt.show()
# Create X and Y datasets for training
X = np.array(df.drop(['class'], 1))
y = np.array(df['class'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Testing
seed = 8
scoring = 'accuracy'
# Define models to train
models = []
models.append(('KNN', KNeighborsClassifier(n_neighbors = 5)))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold = model_selection.KFold(n_splits = 10, random_state = seed)
# Evaluate score by Cross-Validation
cv_results = model_selection.cross_val_score(model, X_train, y_train, cv = kfold, scoring = scoring)
results.append(cv_results)
names.append(name)
msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
print(msg)
# Make predictions on validation dataset
for name, model in models:
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(name)
print(accuracy_score(y_test, predictions))
print(classification_report(y_test, predictions))
clf = SVC(gamma='auto') # create support-vector-classifier
# get accuracy score for it
clf.fit(X_train,y_train)
accuracy = clf.score(X_test, y_test)
print(accuracy)
# Prediction for any example
example = np.array ([[4,2,1,1,1,2,3,2,1]])
example = example.reshape(len(example), -1) #reshape to get a column vector
prediction = clf.predict(example)
#print(prediction)
if prediction==4:
print('Malignant\n')
elif prediction==2:
print('Benign\n')
'''If class is 4, this means that it is malignant; so this particular cell is cancerous.
A class of 2, on the other hand, means benign or healthy.'''
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
本项目致力于利用机器学习方法实现乳腺癌的自动检测。乳腺癌是女性最常见的恶性肿瘤,早期发现和治疗对提高生存率至关重要。 我们采用机器学习算法,通过分析乳腺影像数据,如乳腺X光摄影(mammograms)或乳腺MRI图像,实现对乳腺癌的自动识别和分类。项目使用的数据集包括公开的乳腺癌影像数据集,如DDSM、INbreast等,并进行了预处理,包括图像增强、分割和特征提取等。 在运行环境方面,我们使用Python编程语言,基于Scikit-learn、TensorFlow等机器学习库进行开发。为了提高模型的性能,我们还使用了交叉验证、参数调优等技术。 项目完成后,将实现对乳腺癌的早期、准确检测,为临床诊断提供有力支持,有助于提高患者生存率和生活质量。同时,项目成果也可应用于其他医学影像分析领域。
资源推荐
资源详情
资源评论
收起资源包目录
基于机器学习的乳腺癌检测内含数据集.zip (3个子文件)
dataset
autism-data.csv 23KB
breast-cancer-detection
breast-cancer.py 3KB
breast-cancer.ipynb 357KB
共 3 条
- 1
资源评论
小码蚁.
- 粉丝: 2520
- 资源: 4067
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功