library(robustbase)
library(energy)
library(bayesm)
library(nnet)
library(compositions)
library(xgboost)
library(caret)
MAE_ALR<-matrix(0,30,3)
RMSE_ALR<-matrix(0,30,3)
RR_ALR<-matrix(0,30,3)
ADAD_ALR<-matrix(0,30,1)
STRESSS_ALR<-matrix(0,30,1)
for (ttt in 1:30)
{
trainall=read.csv("train640.csv")
trainall$geogeo=floor(trainall$geogeo/100000)
trainall$veget <- as.factor(trainall$veget)
trainall$soilt <- as.factor(trainall$soilt)
trainall$lc <- as.factor(trainall$lc)
trainall$slope <- as.factor(trainall$slope)
trainall$geogeo <- as.factor(trainall$geogeo)
t<-trainall
dmy <- dummyVars(" ~ .", data = t,fullRank = F)
trainall <- data.frame(predict(dmy, newdata = t))
set.seed(seed[ttt])
index<- sample(1:nrow(trainall), 448)
train<- trainall[index, ]
testdata<- trainall[-index, ]
ALR_1XGBOOST <- xgboost(data = as.matrix(train[,c(-1:-10)]), label = train$alr1,booster = "gbtree",
max_depth = 3, eta = 0.1, colsample_bytree = 0.6,nrounds =20, subsample = 1,gamma=0.6,min_child_weight=0.6,
objective = "reg:linear")
ALR_1.pre<- predict(ALR_1XGBOOST, as.matrix(testdata[, c(-1:-10)]))
ALR_2XGBOOST <- xgboost(data = as.matrix(train[,c(-1:-10)]), label = train$alr2,booster = "gbtree",
max_depth = 3, eta = 0.1, colsample_bytree = 0.6,nrounds = 30, subsample = 1,gamma=1,min_child_weight=0.8,
objective = "reg:linear")
ALR_2.pre<- predict(ALR_2XGBOOST, as.matrix(testdata[, c(-1:-10)]))
XGBOOST_ALR.prediction<-matrix(0,192,2)
XGBOOST_ALR.prediction[,1]<-ALR_1.pre
XGBOOST_ALR.prediction[,2]<-ALR_2.pre
ALRpre<-alrInv(XGBOOST_ALR.prediction)
ALRpre<-matrix(ALRpre,192,3)
ALRpre<-ALRpre*100
sand<-testdata$sand
silt<-testdata$silt
clay<-testdata$clay
SAND<-matrix(ALRpre[,1])
SILT<-matrix(ALRpre[,2])
CLAY<-matrix(ALRpre[,3])
RMSE_ALR.sand = (mean((sand-SAND)^2))^0.5
MAE_ALR.sand=mean(abs(sand-SAND))
RR_ALR.sand=(1-sum((sand-SAND)^2)/sum((sand-mean(sand))^2))*100
RMSE_ALR.silt = (mean((silt-SILT)^2))^0.5
MAE_ALR.silt=mean(abs(silt-SILT))
RR_ALR.silt=(1-sum((silt-SILT)^2)/sum((silt-mean(silt))^2))*100
RMSE_ALR.clay = (mean((clay-CLAY)^2))^0.5
MAE_ALR.clay=mean(abs(clay-CLAY))
RR_ALR.clay=(1-sum((clay-CLAY)^2)/sum((clay-mean(clay))^2))*100
RMSE_ALR[ttt,]=cbind(RMSE_ALR.sand,RMSE_ALR.silt,RMSE_ALR.clay)
MAE_ALR[ttt,]=cbind(MAE_ALR.sand,MAE_ALR.silt,MAE_ALR.clay)
RR_ALR[ttt,]=cbind(RR_ALR.sand,RR_ALR.silt,RR_ALR.clay)
X<-matrix(cbind(sand,silt,clay),192,3)
Y<-matrix(cbind(SAND,SILT,CLAY),192,3)
Aitchison_Dist <- function(X, Y)
{ x=prod(X)^(1/length(X)); y=prod(Y)^(1/length(Y))
X=log(X/x); Y=log(Y/y)
return( (sum((X-Y)^2))^0.5 )
}
AD_ALR <- matrix(0,192,1)
for(i in 1: 192) AD_ALR[i]=Aitchison_Dist(X[i,1:3],Y[i,1:3] )
ADAD_ALR[ttt] <- mean(AD_ALR)
STRESS_ALR1=0
STRESS_ALR2=0
for( i in 1: 191 )
for( j in (i+1): 192 )
{ D1=Aitchison_Dist(X[i,1:3], X[j,1:3])
D2=Aitchison_Dist(Y[i,1:3], Y[j,1:3])
STRESS_ALR1=STRESS_ALR1+((D1-D2)^2)
STRESS_ALR2=STRESS_ALR2+(D1^2)
}
STRESSS_ALR[ttt]= (STRESS_ALR1/STRESS_ALR2)^0.5
}
RMSE_ALR_SAND=mean(RMSE_ALR[,1])
MAE_ALR_SAND=mean(MAE_ALR[,1])
RR_ALR_SAND=mean(RR_ALR[,1])
RMSE_ALR_SILT=mean(RMSE_ALR[,2])
MAE_ALR_SILT=mean(MAE_ALR[,2])
RR_ALR_SILT=mean(RR_ALR[,2])
RMSE_ALR_CLAY=mean(RMSE_ALR[,3])
MAE_ALR_CLAY=mean(MAE_ALR[,3])
RR_ALR_CLAY=mean(RR_ALR[,3])
XGBOOST.RMSE_ALR=cbind(RMSE_ALR_SAND,RMSE_ALR_SILT,RMSE_ALR_CLAY)
XGBOOST.MAE_ALR=cbind(MAE_ALR_SAND,MAE_ALR_SILT,MAE_ALR_CLAY)
XGBOOST.RR_ALR=cbind(RR_ALR_SAND,RR_ALR_SILT,RR_ALR_CLAY)
m_AD_ALR<-mean(ADAD_ALR)
STRESS_ALR<-mean(STRESSS_ALR)
RESULT_ALR<-matrix(0,2,11)
RESULT_ALR[1,2]="RMSE_ALR"
RESULT_ALR[1,5]="MAE_ALR"
RESULT_ALR[1,8]="RR_ALR"
RESULT_ALR[1,10]="AD_ALR"
RESULT_ALR[1,11]="STRESS_ALR"
RESULT_ALR[2,1:3]=round(XGBOOST.RMSE_ALR,3)
RESULT_ALR[2,4:6]=round(XGBOOST.MAE_ALR,3)
RESULT_ALR[2,7:9]=round(XGBOOST.RR_ALR,3)
RESULT_ALR[2,10]=round(m_AD_ALR ,3)
RESULT_ALR[2,11]=round(STRESS_ALR,3)
MAE_CLR<-matrix(0,30,3)
RMSE_CLR<-matrix(0,30,3)
RR_CLR<-matrix(0,30,3)
ADAD_CLR<-matrix(0,30,1)
STRESSS_CLR<-matrix(0,30,1)
for (ttt in 1:30)
{
trainall=read.csv("train640.csv")
trainall$geogeo=floor(trainall$geogeo/100000)
trainall$veget <- as.factor(trainall$veget)
trainall$soilt <- as.factor(trainall$soilt)
trainall$lc <- as.factor(trainall$lc)
trainall$slope <- as.factor(trainall$slope)
trainall$geogeo <- as.factor(trainall$geogeo)
t<-trainall
dmy <- dummyVars(" ~ .", data = t,fullRank = F)
trainall <- data.frame(predict(dmy, newdata = t))
set.seed(seed[ttt])
index<- sample(1:nrow(trainall), 448)
train<- trainall[index, ]
testdata<- trainall[-index, ]
CLR_1XGBOOST <- xgboost(data = as.matrix(train[,c(-1:-10)]), label = train$clr1,booster = "gbtree",
max_depth = 3, eta = 0.1, colsample_bytree = 1,nrounds =40, subsample = 0.8,gamma=0.7,min_child_weight=0.6,
objective = "reg:linear")
CLR_1.pre<- predict(CLR_1XGBOOST, as.matrix(testdata[, c(-1:-10)]))
CLR_2XGBOOST <- xgboost(data = as.matrix(train[,c(-1:-10)]), label = train$clr2,booster = "gbtree",
max_depth = 5, eta = 0.1, colsample_bytree = 0.8,nrounds = 40, subsample = 1,gamma=0.4,min_child_weight=1,
objective = "reg:linear")
CLR_2.pre<- predict(CLR_2XGBOOST, as.matrix(testdata[, c(-1:-10)]))
CLR_3XGBOOST <- xgboost(data = as.matrix(train[,c(-1:-10)]), label = train$clr3,booster = "gbtree",
max_depth = 3, eta = 0.1, colsample_bytree = 0.6,nrounds = 30, subsample = 0.6,gamma=0.7,min_child_weight=0.6,
objective = "reg:linear")
CLR_3.pre<- predict(CLR_3XGBOOST, as.matrix(testdata[, c(-1:-10)]))
XGBOOST_CLR.prediction<-matrix(0,192,3)
XGBOOST_CLR.prediction[,1]<-CLR_1.pre
XGBOOST_CLR.prediction[,2]<-CLR_2.pre
XGBOOST_CLR.prediction[,3]<-CLR_3.pre
CLRpre<-clrInv(XGBOOST_CLR.prediction)
CLRpre<-matrix(CLRpre,192,3)
CLRpre<-CLRpre*100
sand<-testdata$sand
silt<-testdata$silt
clay<-testdata$clay
SAND<-matrix(CLRpre[,1])
SILT<-matrix(CLRpre[,2])
CLAY<-matrix(CLRpre[,3])
RMSE_CLR.sand = (mean((sand-SAND)^2))^0.5
MAE_CLR.sand=mean(abs(sand-SAND))
RR_CLR.sand=(1-sum((sand-SAND)^2)/sum((sand-mean(sand))^2))*100
RMSE_CLR.silt = (mean((silt-SILT)^2))^0.5
MAE_CLR.silt=mean(abs(silt-SILT))
RR_CLR.silt=(1-sum((silt-SILT)^2)/sum((silt-mean(silt))^2))*100
RMSE_CLR.clay = (mean((clay-CLAY)^2))^0.5
MAE_CLR.clay=mean(abs(clay-CLAY))
RR_CLR.clay=(1-sum((clay-CLAY)^2)/sum((clay-mean(clay))^2))*100
RMSE_CLR[ttt,]=cbind(RMSE_CLR.sand,RMSE_CLR.silt,RMSE_CLR.clay)
MAE_CLR[ttt,]=cbind(MAE_CLR.sand,MAE_CLR.silt,MAE_CLR.clay)
RR_CLR[ttt,]=cbind(RR_CLR.sand,RR_CLR.silt,RR_CLR.clay)
X<-matrix(cbind(sand,silt,clay),192,3)
Y<-matrix(cbind(SAND,SILT,CLAY),192,3)
Aitchison_Dist <- function(X, Y)
{ x=prod(X)^(1/length(X)); y=prod(Y)^(1/length(Y))
X=log(X/x); Y=log(Y/y)
return( (sum((X-Y)^2))^0.5 )
}
AD_CLR <- matrix(0,192,1)
for(i in 1: 192) AD_CLR[i]=Aitchison_Dist(X[i,1:3],Y[i,1:3] )
ADAD_CLR[ttt] <- mean(AD_CLR)
STRESS_CLR1=0
STRESS_CLR2=0
for( i in 1: 191 )
for( j in (i+1): 192 )
{ D1=Aitchison_Dist(X[i,1:3], X[j,1:3])
D2=Aitchison_Dist(Y[i,1:3], Y[j,1:3])
STRESS_CLR1=STRESS_CLR1+((D1-D2)^2)
STRESS_CLR2=STRESS_CLR2+(D1^2)
}
STRESSS_CLR[ttt]= (STRESS_CLR1/STRESS_CLR2)^0.5
}
RMSE_CLR_SAND=mean(RMSE_CLR[,1])
MAE_CLR_SAND=mean(MAE_CLR[,1])
RR_CLR_SAND=mean(RR_CLR[,1])
RMSE_CLR_SILT=mean(RMSE_CLR[,2])
MAE_CLR_SILT=mean(MAE_CLR[,2])
RR_CLR_SILT=mean(RR_CLR[,2])
RMSE_CLR_CLAY=mean(RMSE_CLR[,3])
MAE_CLR_CLAY=mean(MAE_CLR[,3])
RR_CLR_CLAY=mean(RR_CLR[,3])
XGBOOST.RMSE_CLR=cbind(RMSE_CLR_SAND,RMSE_CLR_SILT,RMSE_CLR_CLAY)
XGBOOST.MAE_CLR=cbind(MAE_CLR_SAND,MAE_CLR_SILT,MAE_CLR_CLAY)
XGBOOST.RR_CLR=cbind(RR_CLR_SAND,RR_CLR_SILT,RR_CLR_CLAY)
m_AD_CLR<-mean(ADAD_CLR)
STRESS_CLR<-mean(STRESSS_CLR)
RESULT_CLR<-matrix(0,2,11)