在Jupyter中自主构建一个列表、一个集合、两个字典后完成下列操作: 1)将列表乱序输出;2)将集合头部元素弹出;3)用字典2内容更新字典1的内容。
import random as rd
list1 = [1,2,3,4,5]
rd.shuffle(list1)
print(list1)
set1 = {1,3,4,6,8}
print(set1.pop())
dict1 = {"name":"zhangsan","age":20}
dict2 = {"name":"zhangsan","age":21,"sex":"male"}
dict1.update(dict2)
dict1
## 在Jupyter中使用numpy随机生成一个长度为16的乱序数组,更改其维度为4行4列。
再随机生成一个长度为16的满足正态分布的数组,更改其维度为4行4列。上述两个数组做ufunc加法运算并输出结果。
import numpy as np
arr1 = np.random.randint(1,16,(4,4))
#arr1.reshape(4,4)
#arr1.resize(4,4)
arr2 = np.random.normal(0,1,(4,4))
arr2
arr1+arr2
## 在Jupyter中编程创建数据框,索引为id, name, sex和score,用此索引描述3名学生的
信息。修改引入新索引major,填充值为bigdata。按性别对其分组并统计每组score的平均值。
import pandas as pd
data = {
'name':['zhangsan','lisi','wangwu','zhaoliu'],
'sex':["female","male","male","female"],
'age':[21,22,33,23],
'city':["xinxiang","luoyang","xuchang","zhoukou"]
}
df1 = pd.DataFrame(data)
df1['major']="bigdata"
df1
grouped = df1.groupby(df1["sex"])
#grouped["age"].mean()
grouped.mean()
## 对鸢尾花数据集中petal_length和petal_width两列数据进行一元线性回归分析。对上述两列数据进行预处理并进行
回归分析,画出散点图和回归线。打印回归方程的截距和斜率。根据回归模型,给定花萼长度为4.0的花,预测其花萼宽度。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
iris = load_iris()
data = pd.DataFrame(iris.data)
data.columns = ["sepal_length","sepal_width","petal_length","petal_width"]
data.head()
x = data["petal_length"].values
y = data["petal_width"].values
x = x.reshape(len(x),1)
y = y.reshape(len(y),1)
lmodel = LinearRegression()
lmodel.fit(x,y)
plt.scatter(x,y)
pre_y = lmodel.predict(x)
plt.plot(x,pre_y,'r-',linewidth=2)
print("intercept:",lmodel.intercept_)
print("coefficience:",lmodel.coef_)
print("predict petal_length=4.0:",lmodel.predict([[4.0]]))
## 引入boston房价数据集,建立多元线性回归模型,建模时将数据集
随机分为训练集和测试集。使用训练集训练模型,使用测试集测试,并打印测试集的均方误差。
import numpy as np
import pandas as pd
from sklearn import datasets
d=datasets.load_boston()
data=pd.DataFrame(d.data)
data['price']=d.target
data.sample(5)
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
lmodel2=LinearRegression()
x_train,x_test,y_train,y_test=train_test_split(d.data,d.target,random_state=666)
lmodel2.fit(x_train,y_train)
y_predict=lmodel2.predict(x_test)
from sklearn.metrics import mean_squared_error
print('预测值的均方误差:', mean_squared_error(y_test,y_predict))
print(lmodel2.score(x_test,y_test))
## 利用Kmeans算法实现对iris数据集的聚类分析。打印混淆矩阵,
输出模型的准确率,查准率,召回率和F1值。
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.metrics import confusion_matrix
iris = load_iris()
X = iris.data
kmModel = KMeans(n_clusters = 3)
kmModel.fit(X)
label_km = kmModel.labels_
#print(label_km)
#print(iris.target)
print("混淆矩阵:\n",confusion_matrix(iris.target,label_km))
classreport = metrics.classification_report(iris.target,label_km)
print(classreport)
## 利用SVM对iris数据集进行分类,自定义合理的训练集和测试集比例。
打印混淆矩阵,输出模型在三分类上的的准确率,召回率,查准率和F1score的值。
import numpy as np
from sklearn import svm
from sklearn import datasets
from sklearn import metrics
from sklearn import model_selection
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = model_selection.train_test_split(iris.data, iris.target, random_state = 1, test_size = 0.2)
svmModel=svm.SVC(kernel='linear',gamma=0.1,C=0.1)
svmModel.fit(x_train, y_train)
print("SVM-输出训练集的准确率为:", classifier.score(x_train, y_train))
print("SVM-输出测试集的准确率为:", classifier.score(x_test, y_test))
y_predict = svmModel.predict(x_test)
classreport = metrics.classification_report(y_test,y_predict,target_names=['setosa','versicolor','verginica'])
print(classreport)
## 针对下列数据集找出所有频繁项集,并使用提升度找出所有关联规则。
item_list = [['牛奶','面包'],
['面包','尿布','啤酒','土豆'],
['牛奶','尿布','啤酒','可乐'],
['面包','牛奶','尿布','啤酒'],
['面包','牛奶','尿布','可乐']]
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
item_list = [['牛奶','面包'],
['面包','尿布','啤酒','土豆'],
['牛奶','尿布','啤酒','可乐'],
['面包','牛奶','尿布','啤酒'],
['面包','牛奶','尿布','可乐']]
item_df = pd.DataFrame(item_list)
te = TransactionEncoder()
df_tf = te.fit_transform(item_list)
df = pd.DataFrame(df_tf,columns=te.columns_)
df_data = df[['可乐','啤酒','土豆','尿布','牛奶','面包']]
frequent_items = apriori(df_data,min_support=0.5,use_colnames=True)
display(frequent_items)
rules = association_rules(frequent_items,metric='lift',min_threshold=1)
display(rules)