#LASSO与一般线性回归模型构建
X=read.csv("deal2.csv", header = T);head(X)
#由于ch3kw,ch3stat为0,标准化之后为NA,因此删掉
X1=scale(as.matrix(X[,c(-1,-2)]),center = TRUE,scale = TRUE);head(X1)
#将标准化之后的数据保存为deal3
write.csv(X1,"deal3.csv",quote=F,row.names=F)
X=read.csv("deal3.csv", header = T);head(X)
#随机抽取1000个样本
set.seed(1234)
Y=sample(nrow(X),1000,replace=FALSE,prob=NULL);head(Y)
A=X[Y,45];head(A)#总耗电量
B=X[Y,46]#冷却负载
C=X[Y,47];head(C)#系统效率
D=cbind(A,B,C);head(D)
cor(D)
#去除输出变量
F=as.matrix(X[Y,c(-45,-46,-47)]);head(F)
library(glmnet)
model1 <- cv.glmnet(F,as.matrix(X[Y,45]))
model2 <- cv.glmnet(F,as.matrix(X[Y,46]))
model3<- cv.glmnet(F,as.matrix(X[Y,47]))
# 绘制CV曲线图,选择最佳lambda值
plot(model1,main="model1")
plot(model2,main="model2")
plot(model3,main="model3")
fit1=glmnet(F,as.matrix(X[Y,45])) #广义线性回归,自变量未分组,默认为LASSO
b1=coef(fit1,s=model1$lambda.1se);b1 #s代表lamda值,随着lamda减小,约束放宽,筛选的变量越多
fit2=glmnet(F,as.matrix(X[Y,46])) #广义线性回归,自变量未分组,默认为LASSO
b2=coef(fit2,s=model2$lambda.1se);b2 #s代表lamda值,随着lamda减小,约束放宽,筛选的变量越多
fit3=glmnet(F,as.matrix(X[Y,47])) #广义线性回归,自变量未分组,默认为LASSO
b3=coef(fit3,s=model3$lambda.1se);b3 #s代表lamda值,随着lamda减小,约束放宽,筛选的变量越多
#将降维之后的数据保存
L1=X[,c(7,8,19,23,24,30,35,37,42,43,44,47,48,49)];head(L1)
L2=X[,c(7,8,18,19,23,24,26,31,37,43,44,47,48,49)];head(L2)
L3=X[,c(10,20,25,34,38,45,47,48,49)];head(L3)
#再随机抽取1000样本
set.seed(123456)
Y1=sample(nrow(X),1000,replace=FALSE,prob=NULL);head(Y1)
model1=lm(systotpower~.-1,data = L1[Y1,c(-13,-14)])
model2=lm(loadsys~.-1,data = L2[Y1,c(-12,-14)])
model3=lm(effsys~.-1,data = L3[Y1,c(-7,-8)])
anova(model1)#模型检验
summary(model1)#系数检验
shapiro.test(residuals(model1))
anova(model2)#模型检验
summary(model2)#系数检验
anova(model3)#模型检验
summary(model3)#系数检验