自然语言处理NLP自用代码，实现nnLM功能，运用pytorch流行框架，实现代码

共3个文件

py：1个

ipynb：1个

bak：1个

自然语言处理

pytorch

需积分: 5 86 浏览量 2023-08-06 00:42:17 上传评论收藏 4KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

1-1.NNLM.rar （3个子文件）

1-1.NNLM

NNLM.py 2KB

NNLM.ipynb 4KB

NNLM.py.bak 2KB

import torch import torch.nn as nn import torch.optim as optim def make_batch(): input_batch = [] target_batch = [] for sen in sentences: word = sen.split() # space tokenizer input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model' input_batch.append(input) target_batch.append(target) return input_batch, target_batch # Model class NNLM(nn.Module): def __init__(self): super(NNLM, self).__init__() self.C = nn.Embedding(n_class, m) self.H = nn.Linear(n_step * m, n_hidden, bias=False) self.d = nn.Parameter(torch.ones(n_hidden)) self.U = nn.Linear(n_hidden, n_class, bias=False) self.W = nn.Linear(n_step * m, n_class, bias=False) self.b = nn.Parameter(torch.ones(n_class)) def forward(self, X): X = self.C(X) # X : [batch_size, n_step, m] X = X.view(-1, n_step * m) # [batch_size, n_step * m] tanh = torch.tanh(self.d + self.H(X)) # [batch_size, n_hidden] output = self.b + self.W(X) + self.U(tanh) # [batch_size, n_class] return output if __name__ == '__main__': n_step = 2 # number of steps, n-1 in paper n_hidden = 2 # number of hidden size, h in paper m = 2 # embedding size, m in paper sentences = ["i like dog", "i love coffee", "i hate milk"] word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w in enumerate(word_list)} number_dict = {i: w for i, w in enumerate(word_list)} n_class = len(word_dict) # number of Vocabulary model = NNLM() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) input_batch, target_batch = make_batch() input_batch = torch.LongTensor(input_batch) target_batch = torch.LongTensor(target_batch) # Training for epoch in range(5000): optimizer.zero_grad() output = model(input_batch) # output : [batch_size, n_class], target_batch : [batch_size] loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step() # Predict predict = model(input_batch).data.max(1, keepdim=True)[1] # Test print([sen.split()[:2] for sen in sentences], '->', [number_dict[n.item()] for n in predict.squeeze()])

评论收藏

内容反馈