1.3
> library()
> chooseCRANmirror()
> install.packages("e1071")
> update.packages("e1071")
> library(e1071)
> help(package='e1071')
> help(svm,e1071)
> ?e1071:svm
> help.search("svm")
> ??svm
> example(lm)
> demo(graphics)
> demo()
1.4
> data()
> data(iris)
> class(iris)
> save(iris,file = "myData.RData")
> load("myData.RData")
> test.data=read.table(header = T,text="
+a b
+1 2
+3 4
+")
> colnames(test.data)=c("a","b")
> row.names(test.data)=c("first","second")
> write.table(test.data,file="test.txt",sep = "")
> write.csv(test.data,file="test.txt")
1.5
> data(iris)
> iris[1,5]
> iris[1,"Species"]
> iris[,c("Sepal.Length","Sepal.Width")]
> str(iris)
> Five.Sepal.iris=iris[1:5,c("Sepal.Length","Sepal.Width")]
> str(Five.Sepal.iris)
> setosa.data=iris[iris$Species=="setosa",1:5]
> str(setosa.data)
> which(iris$Species=="setosa")
> setosa.data=iris[which(iris$Species=="setosa"),1:5]
> str(setosa.data)
> Sepal.data=subset(iris,select = c("Sepal.Length","Sepal.Width"))
> str(Sepal.data)
> setosa.data=subset(iris,Species=="setosa")
> str(setosa.data)
> flower.type=data.frame(Species="setosa",Flower="iris")
> merge(flower.type,iris[1:3],by= "Species")
> head(iris[order(iris$Sepal.Length,decreasing = T),])
1.6
> data(iris)
> str(iris)
> mean(iris$Sepal.Length)
> sd(iris$Sepal.Length)
> var(iris$Sepal.Length)
> min(iris$Sepal.Length)
> max(iris$Sepal.Length)
> median(iris$Sepal.Length)
> range(iris$Sepal.Length)
> quantile(iris$Sepal.Length)
> sapply(iris[1:4], mean,na.rm=T)
> summary(iris)
> cor(iris[,1:4])
> cov(iris[,1:4])
> t.test(iris$Sepal.Width[iris$Species=="setosa"],iris$Sepal.Width[iris$Species=="versicolor"])
> cor.test(iris$Sepal.Length,iris$Sepal.Width)
1.7
> data(iris)
> table(iris$Species)
> pie(table(iris$Species))
> hist(iris$Sepal.Length)
> boxplot(Petal.Width~Species,data=iris)
> plot(x=iris$Petal.Length,y=iris$Petal.Width,col=iris$Species)
> pairs(iris[1:4])
2.1
> install.packages("car")
> library(car)
> data("Quartet")
> str(Quartet)
> plot(Quartet$x,Quartet$y1)
> lmfit=lm(y1~x,Quartet)
> abline(lmfit,col="red")
> lmfit
2.2
> summary(lmfit)
2.3
> lmfit=lm(y1~x,Quartet)
> newdata=data.frame(x=c(3,6,15))
> predict(lmfit,newdata,interval = "confidence",level = 0.95)
> predict(lmfit,newdata,interval = "predict")
2.4
> par(mfrow=c(2,2))
> plot(lmfit)
2.5
> plot(Quartet$x,Quartet$y2)
> lmfit=lm(Quartet$y2~poly(Quartet$x,2))
> lines(sort(Quartet$x),lmfit$fit[order(Quartet$x)],col="red")
2.6
> plot(Quartet$x,Quartet$y3)
> library(MASS)
> lmfit=rlm(Quartet$y3~Quartet$x)
> abline(lmfit,col="red")
> plot(Quartet$x,Quartet$y3)
> lmfit=lm(Quartet$y3~Quartet$x)
> abline(lmfit,col="red")
2.7
> str(SLID)
> par(mfrow=c(2,2))
> plot(SLID$wages~SLID$language)
> plot(SLID$wages~SLID$age)
> plot(SLID$wages~SLID$education)
> plot(SLID$wages~SLID$sex)
> lmfit=lm(wages~.,data=SLID)
> summary(lmfit)
> lmfit=lm(wages~age+sex+education,data=SLID)
> summary(lmfit)
> par(mfrow=c(2,2))
> plot(lmfit)
> lmfit=lm(log(wages)~age+sex+education,data=SLID)
> plot(lmfit)
> vif(lmfit)
> sqrt(vif(lmfit))>2
> install.packages("lmtest")
> library(lmtest)
> bptest(lmfit)
> install.packages("rms")
> library(rms)
> olsfit=ols(log(wages)~age+sex+education,data=SLID,x=T,y=T)
> robcov(olsfit)
3.1
> install.packages("arules")
> library(arules)
> tr_list=list(c("apple","bread","cake"),c("apple","bread","milk"),c("bread","cake","milk"))
> names(tr_list)=paste("tr",c(1:3),sep = "")
> trans=as(tr_list,"transactions")
> trans
> tr_matrix=matrix(c(1,1,1,0,1,1,0,1,0,1,1,1),ncol = 4)
> dimnames(tr_matrix)=list(paste("tr",c(1:3),sep = ""),c("apple","bread","cake","milk"))
> trans2=as(tr_matrix,"transactions")
> trans2
> tr_df=data.frame(trid=as.factor(c(1,2,1,1,2,3,2,3,2,3)),item=as.factor(c("apple","milk","cake","bread","cake","milk","apple","cake","bread","bread")))
> trans3=as(split(tr_df[,"item"],tr_df[,"trid"]),"transactions")
> trans3
3.2
>install.packages(“arules”)
> library(arules)
> data("Groceries")
>LIST(Groceries)
> summary(Groceries)
> inspect(Groceries[1:10])
> itemFrequencyPlot(Groceries,support=0.1)
3.3
> data("Groceries")
> summary(Groceries)
> itemFrequencyPlot(Groceries,support=0.1,cex.names=0.8,topN=5)
> rules=apriori(Groceries,parameter = list(supp=0.001,conf=0.5,target="rules"))
> summary(rules)
> inspect(head(rules))
> rules=sort(rules,by="confidence",decreasing = T)
> inspect(head(rules))
3.4
> rules.sorted=sort(rules,by="lift")
> subset.matrix=is.subset(rules.sorted,rules.sorted)
> subset.matrix[lower.tri(subset.matrix,diag=T)]=NA
> redundant=colSums(subset.matrix,na.rm = T)>=1
> rules.pruned=rules.sorted[!redundant]
> inspect(head(rules.pruned))
set of 0 rules
3.5
> install.packages("arulesViz")
> library(rulesviz)
> library(arules)
> tr = read.transactions(请在人邮教育社区下载, format="basket")
> summary(tr)
> itemFrequencyPlot(tr, support=0.1)
> rules=apriori(tr, parameter=list(supp=0.5,conf=0.5))
> rules
> inspect(rules)
> library(arulesViz)
> plot(rules)
3.6
> frequentsets=eclat(Groceries,parameter = list(support=0.05,maxlen=10))
> summary(frequentsets)
> inspect(sort(frequentsets,by="support")[1:10])
3.7
> install.packages("arulesSequences")
> library(arulesSequences)
> tmp_data=list(c("a"),c("a","b","c"),c("a","c"),c("d"),c("c","f"),c("a","d"),c("c"),c("b","c"),c("a","e"),c("e","f"),c("a","b"),c("d","f"),c("c"),c("b"),c("e"),c("g"),c("a","f"),c("c"),c("b"),c("c"))
> names(tmp_data)=paste("tr",c(1:20),sep = "")
> trans=as(tmp_data,"transactions")
> transactionInfo(trans)$sequenceID=c(1,1,1,1,1,2,2,2,2,3,3,3,3,3,4,4,4,4,4,4)
> transactionInfo(trans)$eventID=c(10,20,30,40,50,10,20,30,40,10,20,30,40,50,10,20,30,40,50,60)
> trans
> inspect(head(trans))
> summary(trans)
> zaki=read_baskets(con = system.file("misc","zaki.txt",package = "arulesSequences"),info = c("sequenceID","eventID","SIZE"))
> as(zaki,"data.frame")
3.8
> library(arulesSequences)
> s_result=cspade(trans,parameter = list(support=0.75))
> summary(s_result)
> as(s_result,"data.frame")
4.1
> customer=read.csv('customer.csv',header = T)
> head(customer)
> str(customer)
> customer=scale(customer[,-1])
> hc=hclust(dist(customer,method="euclidean"),method="ward.D2")
> hc
> plot(hc,hang = -0.01,cex=0.7)
> hc2=hclust(dist(customer),method="single")
> plot(hc2,hang = 0.01,cex=0.7)
4.2
> fit=cutree(hc,k=4)
> fit
> table(fit)
> plot(hc)
> rect.hclust(hc,k=4,border = "red")
4.3
> View(customer)
> set.seed(22)
> fit=kmeans(customer,4)
> fit
> barplot(t(fit$centers),beside = T,xlab = "cluster",ylab = "value")
> plot(customer,col=fit$cluster)
4.4
> install.packages("cluster")
> library(cluster)
> clusplot(customer,fit$cluster,color = T,shade = T)
> par(mfrow=c(1,2))
> library(cluster)
> clusplot(customer,fit$cluster,color = T,shade = T)
> rect(-0.7,-1.7,2.2,-1.2,border = "orange",lwd=2)
> clusplot(customer,fit$cluster,color = T,xlim = c(-0.7,2.2),ylim=c(-1.7,-1.2))
4.5
> install.packages("fpc")
> library(fpc)
> single_c=hclust(dist(customer),method = "single")
> hc_single=cutree(single_c,k=4)
> complete_c=hclust(dist(customer),method = "complete")
> hc_complete=cutree(complete_c,k=4)
> set.seed(22)
> km=kmeans(customer,4)
> cs=cluster.stats(dist(customer),km$cluster)
> cs[c("within.cluster.ss","avg.silwidth")]
> sapply(list(kmeans=km$cluster,hc_single=hc_single,hc_complete=hc_complete),function(c)cluster.stats(dist(customer),c)[c("within.cluster.ss","avg.silwidth")])
4.6
> set.seed(22)
> km=kmeans(customer,4)
> kms=silhouette(km$cluster,dist(customer))
> summary(kms)
> plot(kms)
4.7
> nk=2: