import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
# from sklearn.preprocessing import StandardScaler
from utils.tools import StandardScaler
from utils.timefeatures import time_features
import warnings
warnings.filterwarnings('ignore')
class Dataset_ETT_hour(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='ETTh1.csv',
target='OT', scale=True, inverse=False, timeenc=0, freq='h', cols=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24*4*4
self.label_len = 24*4
self.pred_len = 24*4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train':0, 'val':1, 'test':2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0, 12*30*24 - self.seq_len, 12*30*24+4*30*24 - self.seq_len]
border2s = [12*30*24, 12*30*24+4*30*24, 12*30*24+8*30*24]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features=='M' or self.features=='MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features=='S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
self.data_x = data[border1:border2]
if self.inverse:
self.data_y = df_data.values[border1:border2]
else:
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
if self.inverse:
seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
else:
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len- self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_ETT_minute(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', data_path='sum.csv',
target='OT', scale=True, inverse=False, timeenc=0, freq='t', cols=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24*4*4
self.label_len = 24*4
self.pred_len = 24*4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train':0, 'val':1, 'test':2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.freq = freq
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
border1s = [0, 12*30*24*4 - self.seq_len, 12*30*24*4+4*30*24*4 - self.seq_len]
border2s = [12*30*24*4, 12*30*24*4+4*30*24*4, 12*30*24*4+8*30*24*4]
border1 = border1s[self.set_type]
border2 = border2s[self.set_type]
if self.features=='M' or self.features=='MS':
cols_data = df_raw.columns[1:]
df_data = df_raw[cols_data]
elif self.features=='S':
df_data = df_raw[[self.target]]
if self.scale:
train_data = df_data[border1s[0]:border2s[0]]
self.scaler.fit(train_data.values)
data = self.scaler.transform(df_data.values)
else:
data = df_data.values
df_stamp = df_raw[['date']][border1:border2]
df_stamp['date'] = pd.to_datetime(df_stamp.date)
data_stamp = time_features(df_stamp, timeenc=self.timeenc, freq=self.freq)
self.data_x = data[border1:border2]
if self.inverse:
self.data_y = df_data.values[border1:border2]
else:
self.data_y = data[border1:border2]
self.data_stamp = data_stamp
def __getitem__(self, index):
s_begin = index
s_end = s_begin + self.seq_len
r_begin = s_end - self.label_len
r_end = r_begin + self.label_len + self.pred_len
seq_x = self.data_x[s_begin:s_end]
if self.inverse:
seq_y = np.concatenate([self.data_x[r_begin:r_begin+self.label_len], self.data_y[r_begin+self.label_len:r_end]], 0)
else:
seq_y = self.data_y[r_begin:r_end]
seq_x_mark = self.data_stamp[s_begin:s_end]
seq_y_mark = self.data_stamp[r_begin:r_end]
return seq_x, seq_y, seq_x_mark, seq_y_mark
def __len__(self):
return len(self.data_x) - self.seq_len - self.pred_len + 1
def inverse_transform(self, data):
return self.scaler.inverse_transform(data)
class Dataset_Custom(Dataset):
def __init__(self, root_path, flag='train', size=None,
features='S', pre_data = None, data_path='ETTh1.csv',
target='OT', scale=True, inverse=False, timeenc=0, freq='h', cols=None):
# size [seq_len, label_len, pred_len]
# info
if size == None:
self.seq_len = 24*4*4
self.label_len = 24*4
self.pred_len = 24*4
else:
self.seq_len = size[0]
self.label_len = size[1]
self.pred_len = size[2]
# init
assert flag in ['train', 'test', 'val']
type_map = {'train':0, 'val':1, 'test':2}
self.set_type = type_map[flag]
self.features = features
self.target = target
self.scale = scale
self.inverse = inverse
self.timeenc = timeenc
self.freq = freq
self.cols = cols
self.root_path = root_path
self.data_path = data_path
self.__read_data__()
def __read_data__(self):
self.scaler = StandardScaler()
df_raw = pd.read_csv(os.path.join(self.root_path,
self.data_path))
'''
df_raw.columns: ['date', ...(other features), target feature]
'''
# cols = list(df_raw.columns);
if self.cols:
cols=self.cols.copy()
cols.remove(self.target)
else:
cols = list
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
本篇博客带大家看的是Informer模型进行时间序列预测的实战案例,它是在2019年被提出并在ICLR 2020上被评为Best Paper,可以说Informer模型在当今的时间序列预测方面还是十分可靠的,Informer模型的实质是注意力机制+Transformer模型,Informer模型的核心思想是将输入序列进行自注意力机制的处理,以捕捉序列中的长期依赖关系,并利用Transformer的编码器-解码器结构进行预测,通过阅读本文你可以学会利用个人数据集训练模型。Informer是一种用于长序列时间序列预测的Transformer模型,但是它与传统的Transformer模型又有些不同点,与传统的Transformer模型相比,Informer具有以下几个独特的特点: 1. ProbSparse自注意力机制:Informer引入了ProbSparse自注意力机制,该机制在时间复杂度和内存使用方面达到了O(Llog L)的水平,能够有效地捕捉序列之间的长期依赖关系。 2. 自注意力蒸馏:通过减少级联层的输入,自注意力蒸馏技术可以有效处理极长的输入序列,提高了模型处理长序列的能力
资源推荐
资源详情
资源评论
收起资源包目录
Informer2020-main.zip (64个子文件)
ETTh1-Test.csv 38KB
ETTh1.csv 2.47MB
data
__init__.py 1B
data_loader.py 13KB
__pycache__
__init__.cpython-39.pyc 153B
data_loader.cpython-39.pyc 9KB
utils
__init__.py 0B
metrics.py 826B
masking.py 851B
timefeatures.py 5KB
tools.py 3KB
__pycache__
timefeatures.cpython-39.pyc 7KB
tools.cpython-39.pyc 3KB
masking.cpython-39.pyc 1KB
__init__.cpython-39.pyc 154B
metrics.cpython-39.pyc 1KB
.idea
Informer2020-main.iml 482B
workspace.xml 6KB
misc.xml 185B
inspectionProfiles
Project_Default.xml 1KB
profiles_settings.xml 174B
modules.xml 293B
.gitignore 50B
aws.xml 304B
exp
__init__.py 0B
exp_informer.py 11KB
exp_basic.py 875B
__pycache__
__init__.cpython-39.pyc 152B
exp_informer.cpython-39.pyc 8KB
exp_basic.cpython-39.pyc 2KB
main_informer.py 7KB
environment.yml 198B
models
__init__.py 0B
attn.py 6KB
model.py 7KB
decoder.py 2KB
embed.py 4KB
encoder.py 3KB
__pycache__
model.cpython-39.pyc 5KB
__init__.cpython-39.pyc 155B
decoder.cpython-39.pyc 2KB
embed.cpython-39.pyc 5KB
encoder.cpython-39.pyc 3KB
attn.cpython-39.pyc 5KB
checkpoints
informer_custom_ftMS_sl126_ll64_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
checkpoint.pth 62.77MB
informer_custom_ftMS_sl126_ll64_pl4_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
checkpoint.pth 62.77MB
forecsat.csv 265B
results
informer_Sum_ftMS_sl126_ll64_pl4_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
pred.npy 9KB
real_prediction.npy 144B
metrics.npy 148B
true.npy 9KB
informer_custom_ftMS_sl126_ll64_pl24_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
pred.npy 12KB
metrics.npy 148B
true.npy 12KB
informer_Sum_ftM_sl126_ll64_pl4_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
pred.npy 27KB
real_prediction.npy 176B
metrics.npy 148B
true.npy 27KB
informer_custom_ftMS_sl126_ll64_pl4_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_0
pred.npy 2KB
metrics.npy 148B
true.npy 2KB
informer_Sum_ftMS_sl126_ll64_pl4_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_test_1
pred.npy 2KB
metrics.npy 148B
true.npy 2KB
共 64 条
- 1
资源评论
- cccth082024-03-19#内容详尽 感谢
Snu77
- 粉丝: 4w+
- 资源: 18
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功