# -*- coding: utf-8 -*-
"""classification.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1K21WJyxKz87V3In4_np7tPdqSfarduCA
# INTRODUCTION
> An electrocardiogram (ECG) is a simple test that can be used to check your heart's rhythm and electrical activity.
> Sensors attached to the skin are used to detect the electrical signals produced by your heart each time it beats. These signals are recorded by a machine and are looked at by a doctor to see if they're unusual.
The _MIT-BIH Arrhythmia Database_ contains 48 half-hour excerpts of two-channel ambulatory ECG recordings, obtained from 47 subjects studied by the BIH Arrhythmia Laboratory between 1975 and 1979. Twenty-three recordings were chosen at random from a set of 4000 24-hour ambulatory ECG recordings collected from a mixed population of inpatients (about 60%) and outpatients (about 40%) at Boston's Beth Israel Hospital; the remaining 25 recordings were selected from the same set to include less common but clinically significant arrhythmias that would not be well-represented in a small random sample.
![image.png](attachment:image.png)
> **Ectopic heartbeats** are extra heartbeats that occur just before a regular beat. Ectopic beats are normal and usually not a cause for concern, though they can make people feel anxious.
"""
import numpy as np, pandas as pd
import tensorflow as tf, matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.utils import resample
import os
"""# DATA DISTRIBUTION INFO
> ptbdb_abnormal.shape => 10506x188
> ptbdb_normal.shape => 4046x188
> mitbih_train.shape => 87554x188
> mitbih_test.shape => 21892x188
Normalization is not required, datasets are already normalized; if it was, we could've used:
> minmaxpreprocessor=preprocessing.MinMaxScaler()
> norm_full_train_x=pd.DataFrame(minmaxpreprocessor.fit_transform(full_train_x))
"""
''' Read the CSV file datasets: NORMAL_LABEL=0 , ABNORMAL_LABEL=1,2,3,4,5 '''
ptbdb_abnormal=pd.read_csv(os.path.join('/heartbeat', 'ptbdb_abnormal.csv'),header=None)
ptbdb_normal=pd.read_csv(os.path.join('/heartbeat', 'ptbdb_normal.csv'),header=None)
ptbdb_train=pd.concat([ptbdb_abnormal,ptbdb_normal],ignore_index=True)
mitbih_train=pd.read_csv(os.path.join('/heartbeat', 'mitbih_train.csv'),header=None)
mitbih_test=pd.read_csv(os.path.join('/heartbeat', 'mitbih_test.csv'),header=None)
'''VISUALIZE THE [MITBIH_TRAIN] DATASET: You will observe that majority of the obsvns are of Label=0'''
fig,ax=plt.subplots(figsize=(8,8))
mitbih_train[187].value_counts().plot(ax=ax,kind='bar')
plt.show()
full_train=pd.concat([mitbih_train,ptbdb_train],ignore_index=True)
full_train.shape #102106*188
'''BREAK DOWN THE FULL TRAIN DATASET INTO X & Y'''
full_train_y=full_train[187]
full_train_x=full_train.drop(columns=[187])
'''BREAK DOWN THE TEST DATASET INTO X & Y'''
mitbih_test_y=mitbih_test[187]
mitbih_test_x=mitbih_test.drop(columns=[187])
'''TO VISUALIZE TRAINING LABEL-WISE DISTRIBUTION OF DATA IN PIE-CHART'''
plt.figure(figsize=(10,10))
plt.pie(mitbih_train[187].value_counts(), labels=['N','Q','V','S','F'],
colors=['orange','yellow','lightblue','lightgreen','grey'], autopct='%.2f%%')
plt.gcf().gca().add_artist(plt.Circle((0,0),0.7,color='white'))
plt.title('Imbalanced classes with majority Normal heartbeats')
plt.show()
"""# VISUALIZING AN EXAMPLE OF EACH ECG CLASS
Curious to observe each entry of the 5 types of heartbeats.
"""
plots= [['Normal Beat','Supraventricular Ectopic Beat'], ['Ventricular Ectopic Beat', 'Fusion Ectopic Beat'], ['Unknown']]
colors= [['green', 'orange'], ['red', 'blue'], ['grey']]
fig, axs = plt.subplots(3, 2, constrained_layout=True, figsize=(14,14))
fig.delaxes(axs[2,1])
for i in range(0,5,2):
j=i//2
axs[j][0].plot(mitbih_train[mitbih_train[187]==i%5].sample(1, random_state=100).iloc[0,:186], colors[j][0])
axs[j][0].set_title('{}. {}'.format(i%5,plots[j][0]))
if i%5!=4:
axs[j][1].plot(mitbih_train[mitbih_train[187]==(i%5)+1].sample(1, random_state=100).iloc[0,:186], colors[j][1])
axs[j][1].set_title('{}. {}'.format(i%5+1,plots[j][1]))
"""# UPSAMPLING TO ENSURE EVEN CLASS-DISTRIBUTION
As you can observe in the above class-wise data distribution, 'N' has the highest number of records, with very few records for 'F' class.
We can use the _sklearn **resample()**_ function to upsample the other classes to bring them up to keel.
"""
'''RESAMPLING THE CLASSES TO HAVE EQUAL DATA DISTRIBUTION LED TO WORSEN PERFORMANCE (POSSIBLE UNDERFITTING REASON)'''
df0=mitbih_train[mitbih_train[187]==0].sample(n=20000,random_state=10)
df1=mitbih_train[mitbih_train[187]==1]
df2=mitbih_train[mitbih_train[187]==2]
df3=mitbih_train[mitbih_train[187]==3]
df4=mitbih_train[mitbih_train[187]==4]
df1_upsampled=resample(df1,replace=True,n_samples=20000,random_state=100)
df2_upsampled=resample(df2,replace=True,n_samples=20000,random_state=101)
df3_upsampled=resample(df3,replace=True,n_samples=20000,random_state=102)
df4_upsampled=resample(df4,replace=True,n_samples=20000,random_state=103)
train_df=pd.concat([df1_upsampled,df2_upsampled,df3_upsampled,df4_upsampled,df0])
print(train_df[187].value_counts())
plt.figure(figsize=(10,10))
plt.pie(train_df[187].value_counts(), labels=['N','Q','V','S','F'],
colors=['orange','yellow','lightblue','lightgreen','grey'], autopct='%.2f%%')
plt.gcf().gca().add_artist(plt.Circle( (0,0), 0.7, color='white' ))
plt.title('Balanced classes after upsampling')
plt.show()
"""# CREATING THE MODEL BY STACKING UP CONV1D LAYERS
The following code-cell will implement a simple Sequential TFv2 model with stacked up Convolutional layers.
These will essentially summarize chunks of data together based on filter-size, to produce feature-maps.
Finally we'll have a softmax output layer to give 5 predictictions since we have 5 classes.
"""
model=tf.keras.Sequential()
model.add(tf.keras.layers.Convolution1D(filters=50,kernel_size=20,activation='relu',kernel_initializer='glorot_uniform',input_shape=(187,1)))
#a1_0=> 187-20+1= 168,50
model.add(tf.keras.layers.MaxPool1D(pool_size=10,data_format='channels_first'))
#a1_1=> 50//10= 168,5
model.add(tf.keras.layers.Convolution1D(filters=20,kernel_size=15,activation='relu',kernel_initializer='glorot_uniform'))
#a2_0=> 168-15+1= 154,20
model.add(tf.keras.layers.MaxPool1D(pool_size=15,data_format='channels_first'))
#a2_1=> 20//15= 154,1
model.add(tf.keras.layers.Convolution1D(filters=10,kernel_size=10,activation='relu',kernel_initializer='glorot_uniform'))
#a3_0=> 154-10+1=145,10
model.add(tf.keras.layers.MaxPool1D(pool_size=10,data_format='channels_first'))
#a3_1=> 10//10=145,1
model.add(tf.keras.layers.Flatten())
#a4=> 145
model.add(tf.keras.layers.Dense(units=512,activation='relu',kernel_initializer='glorot_uniform'))
#a4=> 512
model.add(tf.keras.layers.Dense(units=128,activation='relu',kernel_initializer='glorot_uniform'))
#a5=> 128
model.add(tf.keras.layers.Dense(units=5,activation='softmax'))
model.compile(optimizer='Adam',loss='sparse_categorical_crossentropy',metrics=['acc'])
model.summary()
"""# TRAINING THE MODEL
### BALANCED DATASET
"""
mitbih_test_x2=np.asarray(mitbih_test_x)
mitbih_test_x2=mitbih_test_x2.reshape(-1,187,1)
''' DATASET AFTER UPSAMPLING, WITH EVEN DISTRIBUTION ACROSS CLASSES '''
train_df_X=np.asarray(train_df.iloc[:,:187]).reshape(-1,187,1)
train_df_Y=train_df.iloc[:,187]
print(train_df_X.shape)
hist=model.fit(train_df_X,train_df_Y,batch_size=64,epochs=20)
_,acc=model.evaluate(mitbih_test_x2,mitbih_test_y,verbose=2)
print('Accuracy for the balanced-dataset = ',round(acc*100,2),'%')
"""# TRAINING THE MODEL - II
### UNBALANCED DATASET
"""
''' UNBALANCED TRAIN DATASET FED INTO MODEL '''
full_train_x2=np.asarray(full_train_x)
full_train_x2=full_train_x2.reshape(-1,187,1)
print(full_train_x2.shape)
hist=model.fit(full_train_x2,full_train_y,batch_size=64,epochs=20)
_,acc=model.evaluate(mitbih_test_x2,mitbih_test_
没有合适的资源?快使用搜索试试~ 我知道了~
信号处理基于CNN的心电(ECG)信号分类典型方法实现(tensorflow)
共6个文件
csv:4个
py:1个
ipynb:1个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
5星 · 超过95%的资源 1 下载量 19 浏览量
2024-04-29
12:19:25
上传
评论 1
收藏 98.78MB ZIP 举报
温馨提示
【信号处理】基于CNN的心电(ECG)信号分类典型方法实现(tensorflow)
资源推荐
资源详情
资源评论
收起资源包目录
archive.zip (6个子文件)
archive
ptbdb_normal.csv 18.14MB
mitbih_test.csv 98.13MB
mitbih_train.csv 392.44MB
classification.ipynb 15KB
classification.py 9KB
ptbdb_abnormal.csv 47.09MB
共 6 条
- 1
资源评论
- 2301_766993512024-05-06超级好的资源,很值得参考学习,对我启发很大,支持!
医学信号图像处理匠人
- 粉丝: 6435
- 资源: 33
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功