import os
import numpy as np
from PIL import Image
from torch.utils import data
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm import tqdm
## ------------------- label conversion tools ------------------ ##
def labels2cat(label_encoder, list):
return label_encoder.transform(list)
def labels2onehot(OneHotEncoder, label_encoder, list):
return OneHotEncoder.transform(label_encoder.transform(list).reshape(-1, 1)).toarray()
def onehot2labels(label_encoder, y_onehot):
return label_encoder.inverse_transform(np.where(y_onehot == 1)[1]).tolist()
def cat2labels(label_encoder, y_cat):
return label_encoder.inverse_transform(y_cat).tolist()
## ---------------------- Dataloaders ---------------------- ##
# for 3DCNN
class Dataset_3DCNN(data.Dataset):
"Characterizes a dataset for PyTorch"
def __init__(self, data_path, folders, labels, frames, transform=None):
"Initialization"
self.data_path = data_path
self.labels = labels
self.folders = folders
self.transform = transform
self.frames = frames
def __len__(self):
"Denotes the total number of samples"
return len(self.folders)
def read_images(self, path, selected_folder, use_transform):
X = []
for i in self.frames:
image = Image.open(os.path.join(path, selected_folder, 'frame{:06d}.jpg'.format(i))).convert('L')
if use_transform is not None:
image = use_transform(image)
X.append(image.squeeze_(0))
X = torch.stack(X, dim=0)
return X
def __getitem__(self, index):
"Generates one sample of data"
# Select sample
folder = self.folders[index]
# Load data
X = self.read_images(self.data_path, folder, self.transform).unsqueeze_(0) # (input) spatial images
y = torch.LongTensor([self.labels[index]]) # (labels) LongTensor are for int64 instead of FloatTensor
# print(X.shape)
return X, y
# for CRNN
class Dataset_CRNN(data.Dataset):
"Characterizes a dataset for PyTorch"
def __init__(self, data_path, folders, labels, frames, transform=None):
"Initialization"
self.data_path = data_path
self.labels = labels
self.folders = folders
self.transform = transform
self.frames = frames
def __len__(self):
"Denotes the total number of samples"
return len(self.folders)
def read_images(self, path, selected_folder, use_transform):
X = []
for i in self.frames:
image = Image.open(os.path.join(path, selected_folder, 'frame{:06d}.jpg'.format(i)))
if use_transform is not None:
image = use_transform(image)
X.append(image)
X = torch.stack(X, dim=0)
return X
def __getitem__(self, index):
"Generates one sample of data"
# Select sample
folder = self.folders[index]
# Load data
X = self.read_images(self.data_path, folder, self.transform) # (input) spatial images
y = torch.LongTensor([self.labels[index]]) # (labels) LongTensor are for int64 instead of FloatTensor
# print(X.shape)
return X, y
## ---------------------- end of Dataloaders ---------------------- ##
## -------------------- (reload) model prediction ---------------------- ##
def Conv3d_final_prediction(model, device, loader):
model.eval()
all_y_pred = []
with torch.no_grad():
for batch_idx, (X, y) in enumerate(tqdm(loader)):
# distribute data to device
X = X.to(device)
output = model(X)
y_pred = output.max(1, keepdim=True)[1] # location of max log-probability as prediction
all_y_pred.extend(y_pred.cpu().data.squeeze().numpy().tolist())
return all_y_pred
def CRNN_final_prediction(model, device, loader):
cnn_encoder, rnn_decoder = model
cnn_encoder.eval()
rnn_decoder.eval()
all_y_pred = []
with torch.no_grad():
for batch_idx, (X, y) in enumerate(tqdm(loader)):
# distribute data to device
X = X.to(device)
output = rnn_decoder(cnn_encoder(X))
y_pred = output.max(1, keepdim=True)[1] # location of max log-probability as prediction
all_y_pred.extend(y_pred.cpu().data.squeeze().numpy().tolist())
return all_y_pred
## -------------------- end of model prediction ---------------------- ##
## ------------------------ 3D CNN module ---------------------- ##
def conv3D_output_size(img_size, padding, kernel_size, stride):
# compute output shape of conv3D
outshape = (np.floor((img_size[0] + 2 * padding[0] - (kernel_size[0] - 1) - 1) / stride[0] + 1).astype(int),
np.floor((img_size[1] + 2 * padding[1] - (kernel_size[1] - 1) - 1) / stride[1] + 1).astype(int),
np.floor((img_size[2] + 2 * padding[2] - (kernel_size[2] - 1) - 1) / stride[2] + 1).astype(int))
return outshape
class CNN3D(nn.Module):
def __init__(self, t_dim=120, img_x=90, img_y=120, drop_p=0.2, fc_hidden1=256, fc_hidden2=128, num_classes=50):
super(CNN3D, self).__init__()
# set video dimension
self.t_dim = t_dim
self.img_x = img_x
self.img_y = img_y
# fully connected layer hidden nodes
self.fc_hidden1, self.fc_hidden2 = fc_hidden1, fc_hidden2
self.drop_p = drop_p
self.num_classes = num_classes
self.ch1, self.ch2 = 32, 48
self.k1, self.k2 = (5, 5, 5), (3, 3, 3) # 3d kernel size
self.s1, self.s2 = (2, 2, 2), (2, 2, 2) # 3d strides
self.pd1, self.pd2 = (0, 0, 0), (0, 0, 0) # 3d padding
# compute conv1 & conv2 output shape
self.conv1_outshape = conv3D_output_size((self.t_dim, self.img_x, self.img_y), self.pd1, self.k1, self.s1)
self.conv2_outshape = conv3D_output_size(self.conv1_outshape, self.pd2, self.k2, self.s2)
self.conv1 = nn.Conv3d(in_channels=1, out_channels=self.ch1, kernel_size=self.k1, stride=self.s1,
padding=self.pd1)
self.bn1 = nn.BatchNorm3d(self.ch1)
self.conv2 = nn.Conv3d(in_channels=self.ch1, out_channels=self.ch2, kernel_size=self.k2, stride=self.s2,
padding=self.pd2)
self.bn2 = nn.BatchNorm3d(self.ch2)
self.relu = nn.ReLU(inplace=True)
self.drop = nn.Dropout3d(self.drop_p)
self.pool = nn.MaxPool3d(2)
self.fc1 = nn.Linear(self.ch2 * self.conv2_outshape[0] * self.conv2_outshape[1] * self.conv2_outshape[2],
self.fc_hidden1) # fully connected hidden layer
self.fc2 = nn.Linear(self.fc_hidden1, self.fc_hidden2)
self.fc3 = nn.Linear(self.fc_hidden2, self.num_classes) # fully connected layer, output = multi-classes
def forward(self, x_3d):
# Conv 1
x = self.conv1(x_3d)
x = self.bn1(x)
x = self.relu(x)
x = self.drop(x)
# Conv 2
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.drop(x)
# FC 1 and 2
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.dropout(x, p=self.drop_p, training=self.training)
x = self.fc3(x)
return x
## --------------------- end of 3D CNN module ---------------- ##
## ------------------------ CRNN module ---------------------- ##
def conv2D_output_size(img_size, padding, kernel_size, stride):
# compute output shape of conv2D
outshape = (np.
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
CRNN.zip (38个子文件)
CRNN
.DS_Store 6KB
prediction.ipynb 6KB
wrong_predictions.pkl 61KB
check_predictions
.DS_Store 6KB
check_video_predictions.ipynb 815KB
.ipynb_checkpoints
check_video_predictions-checkpoint.ipynb 815KB
check_video_predictions.ipynb 15KB
CRNN_check_prediction.py 4KB
CRNN_epoch_test_loss.npy 1KB
UCF101_CRNN.py 9KB
fig_UCF101_CRNN.png 749KB
main.ipynb 57KB
CRNN_ckpt
CRNN_epoch_test_score.npy 1KB
CRNN_epoch_training_scores.npy 312KB
replot_loss.ipynb 162KB
functions.py 15KB
CRNN_epoch_training_losses.npy 312KB
UCF101_videos_prediction.pkl 667KB
UCF101actions.pkl 2KB
__pycache__
load_data.cpython-36.pyc 3KB
functions.cpython-36.pyc 9KB
functions.cpython-38.pyc 10KB
outputs
.DS_Store 6KB
loss_UCF101_CRNN.png 981KB
CRNN_epoch_test_loss.npy 360B
CRNN_epoch_test_score.npy 360B
CRNN_epoch_training_scores.npy 76KB
replot_loss.ipynb 137KB
CRNN_epoch_training_losses.npy 76KB
.ipynb_checkpoints
loss_UCF101_CRNN-checkpoint.png 981KB
replot_loss-checkpoint.ipynb 137KB
.ipynb_checkpoints
prediction-checkpoint.ipynb 6KB
main-checkpoint.ipynb 30KB
functions-checkpoint.py 15KB
check_video_predictions-checkpoint.ipynb 15KB
replot_loss-checkpoint.ipynb 137KB
UCF101_CRNN-checkpoint.py 9KB
CRNN_check_prediction-checkpoint.py 4KB
共 38 条
- 1
资源评论
重剑DS
- 粉丝: 335
- 资源: 5
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功