# task-start
import pickle
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from tqdm import trange
import warnings
warnings.filterwarnings("ignore")
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
np.random.seed(SEED)
random.seed(SEED)
torch.backends.cudnn.deterministic = True
class TextClassifier(nn.Module):
def __init__(self, vocab_size=1000, embed_dim=128, nhead=4, num_encoder_layers=2, num_classes=2):
super(TextClassifier, self).__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim)
self.transformer_encoder = nn.TransformerEncoder(
nn.TransformerEncoderLayer(d_model=embed_dim, nhead=nhead),
num_layers=num_encoder_layers
)
self.fc = nn.Linear(embed_dim, num_classes)
def forward(self, text):
embedded = self.embedding(text)
transformer_output = self.transformer_encoder(embedded)
output = self.fc(transformer_output[0])
return output
def get_data_loaders():
data, labels = pickle.load(open('text_classify_training_data.pkl', 'rb'))
dataset = TensorDataset(data, labels)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
return train_loader, val_loader
def train(model, iterator, criterion, optimizer):
model.train()
total_loss = 0
for text, label in iterator:
optimizer.zero_grad()
outputs = model(text.transpose(0, 1))
loss = criterion(outputs, label)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(iterator)
def evaluate(model, iterator, criterion):
model.eval()
total_loss = 0
correct = 0
total = 0
true_positives = 0 # 初始化为0,将在整个验证集上累加
false_positive = 0 # 初始化为0,将在整个验证集上累加
false_negative = 0 # 初始化为0,将在整个验证集上累加
with torch.no_grad():
for text, label in iterator:
outputs = model(text.transpose(0, 1))
loss = criterion(outputs, label)
total_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += label.size(0)
correct += (predicted == label).sum().item()
# 计算每个批次的真正例、假正例和假负例
tp = ((predicted == 1) & (label == 1)).sum().item()
fp = ((predicted == 1) & (label == 0)).sum().item()
fn = ((predicted == 0) & (label == 1)).sum().item()
# 累加整个验证集的真正例、假正例和假负例
true_positives += tp
false_positive += fp
false_negative += fn
accuracy = correct / total * 100
precision = true_positives / (true_positives + false_positive)
recall = true_positives / (true_positives + false_negative)
f1 = 2 * precision * recall / (precision + recall)
return total_loss / len(iterator), accuracy, precision, recall, f1
def run():
model = TextClassifier()
train_loader, val_loader = get_data_loaders()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
NUM_EPOCHS = 10
for epoch in trange(NUM_EPOCHS):
train_loss = train(model, train_loader, criterion, optimizer)
val_loss, val_accuracy, precision, recall, f1 = evaluate(model, val_loader, criterion)
with open('training.log', 'a') as f:
f.write("Epoch: {:d}\nTrain Loss: {:.3f}\nVal Loss: {:.3f} | Val Accuracy: {:.2f}% | Precision: {:.3f} | Recall: {:.3f} | F1: {:.3f}\n".format(epoch+1,train_loss,val_loss,val_accuracy,precision,recall,f1))
if __name__ == '__main__':
run()
# task-end
AI_Code (1).zip
需积分: 0 52 浏览量
2024-05-22
19:06:18
上传
评论
收藏 14KB ZIP 举报
curry-
- 粉丝: 0
- 资源: 1
最新资源
- python-leetcode面试题解之第270题最接近二叉搜索树值.zip
- python-leetcode面试题解之第267题回文排列II.zip
- python-leetcode面试题解之第264题丑数II.zip
- python-leetcode面试题解之第263题丑数.zip
- python-leetcode面试题解之第258题各位相加.zip
- python-leetcode面试题解之第257题二叉树的所有路径.zip
- python-leetcode面试题解之第253题会议室II.zip
- python-leetcode面试题解之第252题会议室.zip
- python-leetcode面试题解之第249题移位字符串分组.zip
- 基于LUT查找表方法的正弦信号产生器FPGA实现,包含testbench,包括程序,注释,操作步骤
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈