library("limma")
#读取文件
data1<-read.table("GSE136757_series_matrix.txt",sep = "\t",quote = "",fill = T,comment.char = "!",header = T)
rownames(data1)<-data1[,1]
data1<-data1[,-1]
col_name<-colnames(data1)
#把数据随机分成训练集和测试集,随机打乱,比例为3:1
tr_set<-sample(col_name,78)
te_set<-setdiff(col_name,tr_set)
#从原始数据中提取训练集
tr_data<-data1[,tr_set]
#从原始数据中提取测试集
te_data<-data1[,te_set]
#制作全部样本分组矩阵
group_list<-c("nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","nonlesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","nonlesional","lesional","lesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","lesional","nonlesional","nonlesional","lesional","lesional","lesional","nonlesional","lesional","lesional","nonlesional","lesional","nonlesional")
g_metrix <- model.matrix(~0+factor(group_list))
colnames(g_metrix)=levels(factor(group_list))
rownames(g_metrix)=colnames(data)
name<-data.frame(row.names(g_metrix))
#制作训练集分组矩阵
group_list1<-data.frame(group_list)
col_name<-data.frame(col_name)
group_list1<-cbind(col_name,group_list1)
tr_list<-data.frame()
for (i in 1:length(tr_set)) {
for (j in 1:dim(group_list1)[1]) {
if(tr_set[i]==group_list1[j,1])
tr_list<-rbind(tr_list,group_list1[j,])
}
}
tr_list2<-as.character(tr_list[,2])
tr_metrix <- model.matrix(~0+factor(tr_list2))
colnames(tr_metrix)=levels(factor(tr_list2))
rownames(tr_metrix)=as.character(tr_list[,1])
#差异比较矩阵
tr_contrast.matrix<-makeContrasts(paste0(unique(tr_list2),collapse = "-"),levels = tr_metrix)
# Contrasts
# Levels nonlesional-lesional
# lesional -1
# nonlesional 1
#4)用训练集进行差异表达基因筛选
fit <- lmFit(tr_data,tr_metrix)
fit2 <- contrasts.fit(fit, tr_contrast.matrix)
fit2 <- eBayes(fit2)
result = topTable(fit2, coef=1, n=Inf)
result1 = na.omit(result)
head(result1)
# logFC AveExpr t P.Value adj.P.Val B
# "1553211_at" 1.1012831 -0.37686775 12.33705 6.693119e-20 3.659463e-15 34.82197
# "224414_s_at" -0.4883682 1.54003153 -11.43953 2.936274e-18 6.821918e-14 31.05822
# "202804_at" -0.1295728 2.22169379 -11.38259 3.743165e-18 6.821918e-14 30.81655
# "202345_s_at" -0.3813873 2.18954540 -11.27573 5.908772e-18 7.305928e-14 30.36217
# "45633_at" -0.2389049 1.57646192 -11.24702 6.681233e-18 7.305928e-14 30.23987
# "237301_at" 0.6458491 0.05674424 10.93992 2.498718e-17 2.276957e-13 28.92701
#筛选差异表达基因
dif_gene<-subset(result1,abs(result1[,1])>=1)
dif_gene<-subset(dif_gene,dif_gene[,4]<=0.05)
#支持向量机建模
#install.packages("e1071")
#for (i in 1:dim(dif_gene)[1]) {
# for (j in 1:dim(data)[1]) {
# if(row.names(data)[j]==row.names(dif_gene)[i]){
# type[i,1]==0
# }
# }
#}
data1<-t(data1)
type<-c(0,1,1,1,1,0,1,1,0,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,0,1,1,0,1,1,0,1,1,1,1,0,1,0,1,1,1,0,1,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,1,1,1,0,1,1,0,1,0)
type<-data.frame(type)
data1<-data.frame(type,data1)
train_sub = sample(nrow(data1),3/4*nrow(data1))
train_data = data1[train_sub,]
#train_data =cbind(train_data,type)
test_data = data1[-train_sub,]
#进行svm建模,
library(e1071)
success=0
a<-c()
#通过循环对测试集部分进行留一法
for (i in 1:dim(test_data)[1]) {
test_list<-test_data[i,]
train_data1<-rbind(train_data,test_data[-i,])
model <- svm (train_data1[,-1],train_data1$type, kernel = "radial", cost = 100, gamma=0.0001, scale = FALSE)
pre_svm <- predict(model,newdata = test_list[,-1])
a<-c(a,pre_svm)
if(test_list[1]==pre_svm[i]){
success=success+1
}
}