import time
import datetime
import numpy as np
import pandas as pd
import lightgbm as lgb
import xgboost as xgb
from dateutil.parser import parse
from sklearn.cross_validation import KFold
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from lightgbm import Booster as lgbm_Booster
from sklearn.metrics import f1_score
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import mean_squared_error
data_path = 'C:/Users/Administrator/Desktop/kid/'
train = pd.read_csv(data_path + 'f_train_20180204.csv', encoding='gb2312')
test = pd.read_csv(data_path + 'f_test_a_20180204.csv', encoding='gb2312')
# X, y = merge[:n_train], train_y[:n_train]
# df = pd.concat([X,y],axis=1)
# def VAR_level(line):
# a1 = line['ALT']
# a2 = line['AST']
# if a1 > 40 and a2 > 40:
# return "ALT_40-"
# # elif a1 >= 40 and a1 < 70:
# # return "ALT_30-100"
# # elif a1 >= 70 and a1 < 100:
# # return "ALT_60-100"
# # elif a1 >= 100:
# # return "ALT_100+"
# else:
# return "ALT_na"
#
# df['VAR_LEVEL'] = df.apply(VAR_level, axis=1)
# pg = df['label'].groupby(df['VAR_LEVEL'])
# print(pg.mean())
#
# def VAR_level(line):
# a1 = line['AST']/line['ALT']
# if a1 < 1:
# return "sys_90-"
# elif a1 >= 1 and a1 < 2:
# return "sys_90-130"
# elif a1 >= 2:
# return "sys_130-180"
# else:
# return "sys_na"
#
# df['VAR_LEVEL'] = df.apply(VAR_level, axis=1)
# pg = df['label'].groupby(df['VAR_LEVEL'])
# print(pg.mean())
# #
#duck
def make_feat(train, test):
#/对缺失值超过一半的特征进行删除/
#/对于基因缺失采用0值填充/
# 对于'孕次', '产次', 'DM家族史','BMI分类'顺序变量采用众数填充/
#/对于连续型变量进行0-1标准化,认为绝对值大于3的为异常点,同缺失值,选择以中位数填充
merge = pd.concat([train, test])
n_train = len(train)
train_y = merge['label']
merge = merge.drop(['label','id'], axis=1)
merge = merge.reset_index(drop=True)
merge.loc['Row_sum'] = merge.isnull().apply(lambda x: x.sum()) ##计算每列有多少缺失值
merge = merge.drop(merge.loc[:, merge.loc['Row_sum'] > 600].columns, axis=1) #drop 缺失值超过一半的
merge = merge.drop(['Row_sum'],axis=0)
merge = merge.sort_index(axis=1, ascending=False)
# 年龄
def age_level(line):
age = line['年龄']
if age < 25:
return "age_0_25"
elif age >= 25 and age < 30:
return "age_25_30"
elif age >= 30 and age < 40:
return "age_30_40"
elif age >= 40 and age < 50:
return "age_40_48"
else:
return "age_na_"
merge['年龄_LEVEL'] = merge.apply(age_level, axis=1)
d_age = pd.get_dummies(merge['年龄_LEVEL'], prefix="年龄")
merge = pd.concat([d_age, merge], axis=1)
merge = merge.drop(['年龄_LEVEL'], axis=1)
# 收缩压
def sys_level(line):
a1 = line['收缩压']
if a1 < 90:
return "sys_90-"
elif a1 >= 90 and a1 < 130:
return "sys_90-130"
elif a1 >= 130 and a1 < 180:
return "sys_130-180"
else:
return "sys_na"
merge['sys_LEVEL'] = merge.apply(sys_level, axis=1)
d_sys = pd.get_dummies(merge['sys_LEVEL'])
merge = pd.concat([d_sys, merge], axis=1)
merge = merge.drop(['sys_LEVEL'], axis=1)
# 收缩压+舒张压
def bld_level(line):
a1 = line['收缩压'] + line['舒张压']
if a1 < 150:
return "bloodT_150-"
elif a1 >= 150 and a1 < 200:
return "bloodT_150-200"
elif a1 >= 200 and a1 < 260:
return "bloodT_200-260"
else:
return "bloodT_na"
merge['bld_LEVEL'] = merge.apply(bld_level, axis=1)
d_bld = pd.get_dummies(merge['bld_LEVEL'])
merge = pd.concat([d_bld, merge], axis=1)
merge = merge.drop(['bld_LEVEL'], axis=1)
# wbc
def wbc_level(line):
a1 = line['wbc']
if a1 < 8:
return "wbc_8-"
elif a1 >= 8 and a1 < 14:
return "wbc_8-14"
elif a1 >= 14 and a1 < 21:
return "wbc_14-21"
else:
return "wbc_na"
merge['wbc_LEVEL'] = merge.apply(wbc_level, axis=1)
d_wbc = pd.get_dummies(merge['wbc_LEVEL'])
merge = pd.concat([d_wbc, merge], axis=1)
merge = merge.drop(['wbc_LEVEL'], axis=1)
# 'ApoA1/ApoB
def Ap_level(line):
a1 = line['ApoA1'] / line['ApoB']
if a1 < 1:
return "Ap_1-"
elif a1 >= 1 and a1 < 3.7:
return "Ap_1-3.7"
elif a1 >= 3.7 and a1 < 15:
return "Ap_3.7-15"
else:
return "Ap_na"
merge['Ap_LEVEL'] = merge.apply(Ap_level, axis=1)
d_Ap = pd.get_dummies(merge['Ap_LEVEL'])
merge = pd.concat([d_Ap, merge], axis=1)
merge = merge.drop(['Ap_LEVEL'], axis=1)
# 孕前体重
def wei_level(line):
a1 = line['孕前体重']
if a1 < 50:
return "wei_50-"
elif a1 >= 50 and a1 < 70:
return "wei_50-70"
elif a1 >= 70 and a1 < 80:
return "wei_70-80"
elif a1 >= 80 and a1 < 100:
return "wei_80-100"
else:
return "wei_na"
merge['wei_LEVEL'] = merge.apply(wei_level, axis=1)
d_wei = pd.get_dummies(merge['wei_LEVEL'])
merge = pd.concat([d_wei, merge], axis=1)
merge = merge.drop(['wei_LEVEL'], axis=1)
# TG
def TG_level(line):
a1 = line['TG']
if a1 < 1.5:
return "TG_1.5-"
elif a1 >= 1.5 and a1 < 5.8:
return "TG_1.5-5.8"
elif a1 >= 5.8 and a1 < 10:
return "TG_5.8-10"
else:
return "TG_na"
merge['TG_LEVEL'] = merge.apply(TG_level, axis=1)
d_TG = pd.get_dummies(merge['TG_LEVEL'])
merge = pd.concat([d_TG, merge], axis=1)
merge = merge.drop(['TG_LEVEL'], axis=1)
# ALT
def ALT_level(line):
a1 = line['ALT']
a2 = line['AST']
if a1 > 40 and a2 > 40:
return "ALT_+"
else:
return "ALT_-"
merge['ALT_LEVEL'] = merge.apply(ALT_level, axis=1)
alt = pd.get_dummies(merge['ALT_LEVEL'])
merge = pd.concat([alt, merge], axis=1)
merge = merge.drop(['ALT_LEVEL'], axis=1)
# VAR
def VAR_level(line):
a1 = line['VAR00007']
if a1 < 1.3:
return "VAR_1.3-"
elif a1 >= 1.3 and a1 < 1.6:
return "VAR_1.3-1.6"
elif a1 > 1.6:
return "VAR_1.6+"
else:
return "VAR_na"
merge['VAR_LEVEL'] = merge.apply(VAR_level, axis=1)
var = pd.get_dummies(merge['VAR_LEVEL'])
merge = pd.concat([var, merge], axis=1)
merge = merge.drop(['VAR_LEVEL'], axis=1)
# 孕次
def pg_level(line):
pg = line['孕次']
if pg < 3:
return "pg_0_3"
else:
return "pg_3_"
merge['pg_LEVEL'] = merge.apply(pg_level, axis=1)
d_pg = pd.get_dummies(merge['pg_LEVEL'])
merge = pd.concat([d_pg, merge], axis=1)
merge = merge.drop(['pg_LEVEL'], axis=1)
##################################################################
for i in['年龄','身高','孕前体重','孕前BMI','收缩压','舒张压
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip 【备注】 1、该资源内项目代码百分百可运行,请放心下载使用!有问题请及时沟通交流。 2、适用人群:计算机相关专业(如计科、信息安全、数据科学与大数据技术、人工智能、通信、物联网、自动化、电子信息等)在校学生、专业老师或者企业员工下载使用。 3、用途:项目具有较高的学习借鉴价值,不仅适用于小白学习入门进阶。也可作为毕设项目、课程设计、大作业、初期项目立项演示等。 4、如果基础还行,或热爱钻研,亦可在此项目代码基础上进行修改添加,实现其他不同功能。 欢迎下载!欢迎交流学习!不清楚的可以私信问我!
资源推荐
资源详情
资源评论
收起资源包目录
基于python机器学习实现人工智能辅助糖尿病预测源码+数据.zip (6个子文件)
else.py 21KB
yu.R 5KB
final.csv 600B
pre_test.ipynb 12KB
final_test.ipynb 35KB
项目介绍.md 3KB
共 6 条
- 1
资源评论
- zyz0303042024-10-30这个资源值得下载,资源内容详细全面,与描述一致,受益匪浅。
onnx
- 粉丝: 9977
- 资源: 5626
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功