########################### Chapter Three KNN ###################################
## Advantage of K-Nearest Neighbor: 1. simple and effective
## 2. no other requirement of sample distribution
## 3. training fast
## Disadvantage of K-Nearest Neighbor: 1. hard to comprehend how to classify
## 2. proper K
## 3. slow in classficition step
install.packages("class")
install.packages("gmodels")
############################# PRE-REVIEW OF DATA SET ###############################
setwd("C:/Users/Shijie Wang/Desktop/R/Machine-Learning-with-R-datasets-master") # working path
wbcd = read.csv("wisc_bc_data.csv", stringsAsFactors = FALSE) # import data
str(wbcd) # summary of data structure
wbcd = wbcd[-1] # DELETE ID IN CASE OF OVER-FIT
table(wbcd$diagnosis) # DIAGNOSIS FACTOR - THE CLASSFICAITON FACTOR( BENIGN/MALIGNANCE)
wbcd$diagnosis = factor(wbcd$diagnosis, levels = c("B", "M"),
labels = c("Benign", "Malignant")) # FACTORIZATION OF CLASS IN ML
round(prop.table(table(wbcd$diagnosis)) * 100, digits = 1)
summary(wbcd[c("radius_mean", "area_mean", "smoothness_mean")]) # THREE CHARACTER
# NOTE: CALCULATION OF DISTANCE: UNIT INFLUENCE SHOULD BE ELMINATED BY *NORMALIZATION*
############################### PRE-PROCESSS OF DATA #################################
normaliza = function(x){
return((x-min(x))/(max(x)-min(x)))
} # NORMALIZE DATA TO ELIMATE UNIT IMPACT
wbcd_n = as.data.frame(lapply(wbcd[2:31], normaliza)) # APPLY TO EVERY COLUMN
summary(wbcd_n$area_mean)
############################# ESTABLISH MODEL(TRAIN DATA AND TEST DATA)
wbcd_train = wbcd_n[1:469, ]
wbcd_test = wbcd_n[470:569, ] # NOTE DATA SET IS ALREADY "READOM" NO NEED TO SAMPLE
wbcd_train_labels = wbcd[1:469,1]
wbcd_test_labels = wbcd[470:569,1] # CLASS FACTORS OF DATA TO TRAIN
library(class)
wbcd_test_pred = knn(train = wbcd_train, test = wbcd_test, cl = wbcd_train_labels, k = 21)
# NOTE: NORMALLY, K VALUE EQUALS Squr DATA_SIZE
## PROBLEM: HOW TO DEFINE K IN A PROPER WAY ?????????????????????????
############################# EVULATE AND TEST MODEL ABILITY ###########################
library(gmodels)
CrossTable(x = wbcd_test_labels, y = wbcd_test_pred, prop.chisq = FALSE)
# INTERPOLATE THE OUTCOME, ESPECIALLY THE FALSE NEGATIVE AND FALSE POSITIVE
knn.rar_R语言_R语言knn预测_knn算法
版权申诉
100 浏览量
2022-09-23
20:17:11
上传
评论
收藏 1KB RAR 举报
APei
- 粉丝: 63
- 资源: 1万+
最新资源
- IMG_0694.GIF
- 基于图像的三维模型重建C++源代码+文档说明(高分课程设计)
- 基于聚焦法的工件立体测量方案,根据数据进行三维重建 使用HALCON处理图像,MATLAB拟合数据+源代码+数据集+效果图
- 锄战三国村 修改:货币使用不减 v1.10(2) 原创 (中文).apk
- 基于python实现的单目双目视觉三维重建+源代码+图像图片(高分课程设计)
- 基于C+++OPENCV的全景图像拼接源码(课程设计)
- 基于Python+OpenCV对多张图片进行全景图像拼接,消除鬼影,消除裂缝+源代码+文档说明+界面截图(高分课程设计)
- 基于C++实现的全景图像拼接源码(课程设计)
- 基于SIFT特征点提取和RASIC算法实现全景图像拼接python源码+文档说明+界面截图+详细注释(95分以上课程大作业)
- 基于matlab实现眼部判别的疲劳检测系统+源代码+全部数据+文档说明+详细注释+使用说明+截图(高分课程设计)
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
评论0