import os
os.chdir('E:\\code\\homework\\timeseries\\web-traffic-time-series-forecasting')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
train = pd.read_csv('train_1.csv').fillna(0)
train.head()
sum_set = pd.DataFrame(train[['Page']])
sum_set['total'] = train.sum(axis=1)
sum_set = sum_set.sort_values('total',ascending=False)
top_pages = sum_set.index[0:10]
#找到访问量最大的十个网站,并保存在data_list中
data_list = []
for _,index in enumerate(top_pages):
cols = train.columns
cols = cols[1:-1]
data = train.loc[index,cols]
data_list.append(data)
days = [x for x in range(len(cols))]
plt.plot(days,data)
plt.show()
#信号产生器
import numpy as np
import math
import random
def getSignal(length):
x = np.linspace(1,length,length)
para = np.random.rand(3,10)
A = para[0]
w = para[1]
b = para[2]
y = np.zeros(len(x))
for i,_ in enumerate(y):
ans = 0
for j in range(len(A)):
ans += A[j] * math.sin(w[j]*x[i]+b[j])
y[i] = ans
return y
import keras
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = {'batch':[], 'epoch':[]}
self.val_loss = {'batch':[], 'epoch':[]}
def on_batch_end(self, batch, logs={}):
self.losses['batch'].append(logs.get('loss'))
self.val_loss['batch'].append(logs.get('val_loss'))
def on_epoch_end(self, batch, logs={}):
self.losses['epoch'].append(logs.get('loss'))
self.val_loss['epoch'].append(logs.get('val_loss'))
def loss_plot(self, loss_type):
iters = range(len(self.losses[loss_type]))
plt.figure()
# loss
plt.plot(iters, self.losses[loss_type], 'g', label='train loss')
if loss_type == 'epoch':
# val_loss
plt.plot(iters, self.val_loss[loss_type], 'k', label='val loss')
plt.grid(True)
plt.xlabel(loss_type)
plt.ylabel('acc-loss')
plt.legend(loc="upper right")
plt.show()
#普通的Keras LSTM
#Keras LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import time
split_factor = 0.8
split_num = int(len(data_list[0])*split_factor)
cols = train.columns[1:-1]
hidden_size = 8
output_size = 1
batch_size = 8
window_size = 20
epoch_time = 100
data_list[0] = getSignal(500)
def getWindow(data,window_size):
x = []
for t in range(len(data)-window_size):
a = data[t:t+window_size]
x.append(a)
x = np.array(x)
x = np.reshape(x,(len(x),1,window_size))
return x
for i,index in enumerate(data_list):
if i>=1:
break
begin = time.time()
data = np.array(data_list[i],'f')
#进行切分
X_train = data[:split_num]
y_train = data[1:split_num+1]
X_test = data[split_num:-1]
y_test = data[split_num+1:]
xsc = MinMaxScaler()
ysc = MinMaxScaler()
X_train = np.reshape(X_train,(-1,1))
y_train = np.reshape(y_train,(-1,1))
X_train = xsc.fit_transform(X_train)
y_train = ysc.fit_transform(y_train)
X_train = np.reshape(X_train,(-1,1,1))
x = getWindow(X_train,window_size)
y_train = y_train[window_size:]
#Keras LSTM
history = LossHistory()
begin = time.time()
regressor = Sequential()
regressor.add(LSTM(hidden_size, return_sequences=True, input_shape=(x.shape[1], window_size)))
regressor.add(LSTM(hidden_size, return_sequences=True))
regressor.add(LSTM(hidden_size))
regressor.add(Dense(output_size))
regressor.compile(loss='mean_squared_error',optimizer='adam')
regressor.fit(x, y_train, batch_size = batch_size, epochs = epoch_time, verbose = 0.2,shuffle=False,callbacks=[history])
#进入测试环节
inputs = X_test
inputs = np.reshape(inputs,(-1,1))
inputs = xsc.transform(inputs)
inputs = np.reshape(inputs,(-1,1,1))
x = getWindow(inputs,window_size)
y_pred = regressor.predict(x)
y_pred = ysc.inverse_transform(y_pred)
end = time.time()
#计时结束
y_test = y_test[window_size:]
print('total time for {} is {} s'.format(i,end-begin))
plt.figure()
plt.plot(y_test, color = 'red', label = 'Real Web View')
plt.plot(y_pred, color = 'blue', label = 'Predicted Web View')
plt.title('Web View Forecasting{}'.format(i))
plt.xlabel('Number of Days from Start')
plt.ylabel('Web View')
plt.legend()
plt.show()
history.loss_plot('epoch')
print(mean_squared_error(y_test,y_pred))
#stateful LSTM
#Keras stateful LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import time
split_factor = 0.8
split_num = int(len(data_list[0])*split_factor)
cols = train.columns[1:-1]
hidden_size = 8
output_size = 1
batch_size = 1
window_size = 5
ephch_time = 100
def getWindow(data,window_size):
x = []
for t in range(len(data)-window_size):
a = data[t:t+window_size]
x.append(a)
x = np.array(x)
x = np.reshape(x,(len(x),1,window_size))
return x
for i,index in enumerate(data_list):
if i>=1:
break
begin = time.time()
data = np.array(data_list[i],'f')
#进行切分
X_train = data[:split_num]
y_train = data[1:split_num+1]
X_test = data[split_num:-1]
y_test = data[split_num+1:]
xsc = MinMaxScaler()
ysc = MinMaxScaler()
X_train = np.reshape(X_train,(-1,1))
y_train = np.reshape(y_train,(-1,1))
X_train = xsc.fit_transform(X_train)
y_train = ysc.fit_transform(y_train)
X_train = np.reshape(X_train,(-1,1,1))
x = getWindow(X_train,window_size)
y_train = y_train[window_size:]
#Keras LSTM
begin = time.time()
regressor = Sequential()
regressor.add(LSTM(hidden_size, return_sequences=True, input_shape=(x.shape[1], window_size),batch_size = batch_size,stateful=True))
regressor.add(LSTM(hidden_size, return_sequences=True,batch_size = batch_size,stateful=True))
regressor.add(LSTM(hidden_size,batch_size = batch_size,stateful=True))
regressor.add(Dense(output_size))
regressor.compile(loss='mean_squared_error',optimizer='adam')
regressor.fit(x, y_train, batch_size = batch_size, epochs = epoch_time, verbose = 0.2,shuffle=False)
#进入测试环节
inputs = X_test
inputs = np.reshape(inputs,(-1,1))
inputs = xsc.transform(inputs)
inputs = np.reshape(inputs,(-1,1,1))
x = getWindow(inputs,window_size)
y_pred = regressor.predict(x,batch_size=1)
y_pred = ysc.inverse_transform(y_pred)
end = time.time()
#计时结束
y_test = y_test[window_size:]
print('total time for {} is {} s'.format(i,end-begin))
plt.figure()
plt.plot(y_test, color = 'red', label = 'Real Web View')
plt.plot(y_pred, color = 'blue', label = 'Predicted Web View')
plt.title('Web View Forecasting{}'.format(i))
plt.xlabel('Number of Days from Start')
plt.ylabel('Web View')
plt.legend()
plt.show()
print(mean_squared_error(y_test,y_pred))
#Keras online training without stateful。online training效果不是很好,可能是滑动窗口的问题
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import time
split_factor = 0.8
split_num = int(len(data_list[0])*split_factor)
cols = train.columns[1:-1]
hidden_size = 8
output_size = 1
batch_size = 1
window_size = 20
epoch_time = 20
train_split = 0.4
train_num = int(len(data_list[0])*train_split)
#predict_back为在线训练时训练的间隔
predict_epoch_time = 10
predict_window = window_size
def getWindow(data,window_size):
x = []
for t in range(len(data)-window_size):
a = data[t:t+win
使用神经网络和传统算法对于给定时间序列数据进行预测.zip
需积分: 5 95 浏览量
2023-12-28
19:58:25
上传
评论
收藏 13KB ZIP 举报
Lei宝啊
- 粉丝: 1979
- 资源: 1330
最新资源
- 《班级管理相关类的实现+射击游戏类的实现(Python)》学生实验(项目)报告
- MSI2301-VB一款SOT23封装P-Channel场效应MOS管
- sap-me-complex- assembly-how-to-guide-en
- sap-me-collaboration-how-to-guide-en
- 达梦数据库-备份与还原-国产数据库-DM8备份与还原.pdf
- MP4946-VB一款SOP8封装2个N-Channel场效应MOS管
- sap-me-basic-routing-how-to-guide-en
- 《图书管理系统(Python)》学生实验(项目)报告
- How To Set Up and Use the SAP ME Barcode Scanning Feature
- LMX2594 硬件参考设计
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈