import torch
import torch.nn.functional as F
from torch import nn, optim
import torchvision
from torchvision import transforms
from torch.autograd import Variable
training_data = [
("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
character_to_ix = {}
words_characters = {}
for sentence, tag in training_data:
for word in sentence:
if word not in word_to_idx:
words_characters[len(word_to_ix)] = list(word)
word_to_ix[word] = len(word_to_ix)
for character in word:
if character not in character_to_ix:
character_to_ix[character] = len(character_to_ix)
#
print('--------------------\n')
# {'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}
# print(word_to_idx)
# {0: ['T', 'h', 'e'], 1: ['d', 'o', 'g'], 2: ['a', 't', 'e'], 3: ['t', 'h', 'e'],
# That is: 0,1,... that is the related word, and the related value is the strs of the word
# print(words_characters)
# {'T': 0, 'h': 1, 'e': 2, 'd': 3, 'o': 4, 'g': 5, 'a': 6, 't': 7, 'p': 8, 'l': 9, 'E': 10, 'v': 11, 'r': 12, 'y': 13, 'b': 14, 'k': 15}
# print(character_to_ix)
# print(word_to_idx)
tag_to_ix = {"DET":0,"NN":1, "V":2}
ix_to_tag = {0: "DET", 1:"NN", 2:"V"}
####################### Not pass>>>>>>>>>>>>>>>>>>
# for word in training_data[0][0]:
# print(word)
# print(words_characters[word.item()])
# break
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
def prepare_sequence(seq, to_ix):
idxs = [to_ix[w] for w in seq]
return torch.tensor(idxs, dtype=torch.long)
training_data = [
("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
# word_to_ix = {}
# Transform into the chars
# Pricatial maybe set the 32-dims/64-dims
EMBEDDING_DIM = 6
HIDDEN_DIM = 6
def prepare_sequence(seq,to_idx):
idxs = [to_idx[w] for w in seq]
return torch.tensor(idxs, dtype = torch.long)
class LSTMTagger(nn.Module):
/*
开发不易,整理也不易,如需要详细的说明文档和程序,以及完整的数据集,训练好的模型,或者进一步开发,
可加作者新联系方式咨询,WX:Q3101759565,QQ:3101759565
*/
embeds = list()
for sentence_word in sentence:
# str-Embeddomg
# (len(sentence_word),embedding_dim)
# one word has the length of the str, and embedding the length dims
word_embed = self.word_embeddings(sentence_word)
# The word related to the number start from 1
# word_character
# # # {0: ['T', 'h', 'e'], 1: ['d', 'o', 'g'], 2: ['a', 't', 'e'], 3: ['t', 'h', 'e'],
word_character = words_characters[sentence_word.item()]
# How many str for a word in a sentence related to the number
word_character_in = prepare_sequence(word_character, character_to_ix)
character_embeds = self.character_embeddings(word_character_in)
################### Needing to verfied the other input : self.hidden_character
# character_lstm_out, self.hidden_character = self.lstm_character(character_embeds.view(len(word_character_in), 1, -1),self.hidden_character)
# word_embed: [,embedding_dim] and plus the str-lstm output: self.hidden_character[0]:
# shape: [layers*double-->1,1,output_embedding_dim]
character_lstm_out, self.hidden_character = self.lstm_character(character_embeds.view(len(word_character_in), 1, -1))
embed = torch.cat((word_embed, self.hidden_character[0].view(-1)))
embeds.append(embed)
# convert to a shape for a sentence for LSTM model:
# number of words, (len(sentence)) 1: one sentence -1: embedding dims
embeds = torch.cat(embeds).view(len(sentence), 1, -1)
# lstm_out: [number of words, batch, self.hidden_tag]
# lstm_out, self.hidden_tag = self.tag_lstm(embeds, self.hidden_tag)
lstm_out, self.hidden_tag = self.tag_lstm(embeds)
# convert to Two dimensions: lstm_out.view(len(sentence), -1)
tag_space = self.hidden2tag(lstm_out.view(len(sentence), -1))
# tag_scores: (len(sentence),tagset_size)
tag_scores = F.log_softmax(tag_space, dim=1)
return tag_scores
#### Model Trainning
# model = LSTMTagger(EMBEDDING_DIM,HIDDEN_DIM,len(word_to_idx),len(tag_to_ix))
WORD_EMBEDDING_DIM = 6
CHARACTER_EMBEDDING_DIM = 3
HIDDEN_DIM = 6
CHARACTER_HIDDEN_DIM = 3
# embedding_dim, character_embedding_dim,hidden_dim, character_hidden_dim,vocab_size, character_size,tagset_size
model = LSTMTagger(WORD_EMBEDDING_DIM,CHARACTER_EMBEDDING_DIM,HIDDEN_DIM,CHARACTER_HIDDEN_DIM,len(word_to_idx),
len(character_to_ix),len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.0001)
print('OK...')
# print(word_to_ix)
with torch.no_grad():
# print(training_data[0][0]) # The fist sentence [0][0] related all words: ['The', 'dog', 'ate', 'the', 'apple']
# idxs = [word_to_ix[w] for w in training_data[0][0]]
inputs = prepare_sequence(training_data[0][0], word_to_ix)
# tensor([0, 1, 2, 3, 4]): inputs: The first sentence included the words transformed into numbers
print(inputs) # That is: In the inputs, the number in the Tensor inputs related to a word.
tag_scores = model(inputs, words_characters)
# Perform Mdel Training
# for sentence, tags in training_data:
# print(sentence) # ['The', 'dog', 'ate', 'the', 'apple']
# print(tags) # ['DET', 'NN', 'V', 'DET', 'NN']
for epoch in range(50):
#
for sentence, tags in training_data:
optimizer.zero_grad()
# Transformed into number for a Tensor
sentence_in = prepare_sequence(sentence,word_to_ix)
targets = prepare_sequence(tags,tag_to_ix)
# Front caulation
tag_scores = model(sentence_in,words_characters)
# tag_scores: [words_len,target_size]
# targets: (words_len,)
loss = loss_function(tag_scores, targets)
loss.backward()
optimizer.step()
print(loss)
print('----------------OK--------------\n')
with torch.no_grad():
inputs = prepare_sequence(training_data[0][0], word_to_ix)
tag_scores = model(inputs, words_characters)
# print(tag_scores.shape) # torch.Size([5, 3]
_,predict_idx = torch.max(tag_scores,1)
# print(predict_idx)
# print(predict_idx.shape) # ([5])
predict_tag_1 = [ix_to_tag[idx] for idx in list(predict_idx.numpy())]
print(predict_tag_1)
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
基于Pytorch的字符+单词的改进的LSTMTagger模型建立训练以及测试实现.zip (1个子文件)
基于Pytorch的字符+单词的改进的LSTMTagger模型建立训练以及测试实现.py 7KB
共 1 条
- 1
资源评论
迪哥_AI_人工智能
- 粉丝: 650
- 资源: 187
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功