from os import name
from time import process_time_ns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LassoCV,Lasso,LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve,auc
import seaborn as sns
import os
def get_result(path):
data = pd.read_excel(path)
n = len(data)+1
x = data.iloc[:,1:n]
x = pd.DataFrame(x)
x = StandardScaler().fit_transform(x)
y = data.iloc[:,0]
alphas = np.logspace(-3,1,50)
model_lassoCV = LassoCV(alphas = alphas,cv = 10, max_iter = 100000).fit(x,y)
lamda = model_lassoCV.alpha_
coef = pd.Series(model_lassoCV.coef_, index = data.columns[1:n])
coefplot = pd.DataFrame()
coefplot = pd.DataFrame(coef)
coefplot.columns = ['weight']
coefplot = coefplot[coefplot['weight'] !=0]
coefplot['index'] = coefplot.index
#plt.style.use('ggplot')
print(coefplot)
plt.figure()
p = sns.barplot(x = 'index' , y = 'weight', data=coefplot )
index = coef[coef != 0].index
lrx = data.iloc[:,1:n][list(index)]
x_train,x_test,y_train,y_test = train_test_split(lrx, y, test_size=0.3, random_state=0)
clf = LogisticRegression(max_iter=5000)
y_score = clf.fit(x_train,y_train).predict_proba(x_test)[:,1]
y_score_data = pd.DataFrame(y_score,index=y_test.index)
y_score_train = clf.fit(x_train,y_train).predict_proba(x_train)[:,1]
y_score_train = pd.DataFrame(y_score_train,index=y_train.index)
train_res = pd.concat([y_train,y_score_train],axis=1)
res = pd.concat([y_test,y_score_data],axis=1)
fpr,tpr,threshold = roc_curve(y_test,y_score)
roc_auc = auc(fpr,tpr)
print(roc_auc)
orign = 'result/' + path.split('.')[0]
if os.path.exists(orign) :
name = orign + '/' + 'pre.csv'
name0 = orign + '/' + str(lamda) + '.csv'
name1 = orign + '/figure.pdf'
name2 = orign + '/coef.csv'
else:
os.makedirs(orign)
name = orign + '/' + 'pre.csv'
name0 = orign + '/' + str(lamda) + '.csv'
name1 = orign + '/figure.pdf'
name2 = orign + '/coef.csv'
p.figure.savefig(name1, dpi = 400)
res.to_csv(name0)
train_res.to_csv(name)
coefplot.to_csv(name2)
files = os.listdir('data')
# filelist = []
# for i in files:
# if i.split('.')[-1] == 'xlsx' and i.startswith('3'):
# filelist.append(i)
# print(len(filelist))
# print(files)
for file in files:
print(file)
get_result(file)
#get_result('3d_IVIM_ADC基线.xlsx')
r语言建立逻辑回归临床预测模型 +逻辑回归临床预测模型lasso回归变量筛选roc曲线定制Delong检验
2星 125 浏览量
2023-11-01
20:44:15
上传
评论 2
收藏 45KB RAR 举报
温柔-的-女汉子
- 粉丝: 629
- 资源: 3934