基于注意力机制的双向长短期记忆网络的关系分类.zip

共15个文件

py：10个

xml：4个

iml：1个

版权申诉

自注意力

attention

112 浏览量 2024-02-06 22:24:50 上传评论收藏 24KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于注意力机制的双向长短期记忆网络的关系分类.zip （15个子文件）

Att-BiLSTM-OpenNRE-master

nrekit

rl.py 12KB

data_loader.py 26KB

network

embedding.py 2KB

selector.py 7KB

classifier.py 2KB

encoder.py 3KB

framework.py 17KB

.idea

OpenNRE.iml 459B

workspace.xml 20KB

misc.xml 236B

inspectionProfiles

profiles_settings.xml 156B

modules.xml 266B

test_demo.py 5KB

train_demo.py 6KB

draw_plot.py 1KB

from six import iteritems import json import os import multiprocessing import numpy as np import random class file_data_loader: def __next__(self): raise NotImplementedError def next(self): return self.__next__() def next_batch(self, batch_size): raise NotImplementedError class npy_data_loader(file_data_loader): MODE_INSTANCE = 0 # One batch contains batch_size instances. MODE_ENTPAIR_BAG = 1 # One batch contains batch_size bags, instances in which have the same entity pair (usually for testing). MODE_RELFACT_BAG = 2 # One batch contains batch size bags, instances in which have the same relation fact. (usually for training). def __iter__(self): return self def __init__(self, data_dir, prefix, mode, word_vec_npy='vec.npy', shuffle=True, max_length=120, batch_size=160): if not os.path.isdir(data_dir): raise Exception("[ERROR] Data dir doesn't exist!") self.mode = mode self.shuffle = shuffle self.max_length = max_length self.batch_size = batch_size self.word_vec_mat = np.load(os.path.join(data_dir, word_vec_npy)) self.data_word = np.load(os.path.join(data_dir, prefix + "_word.npy")) self.data_pos1 = np.load(os.path.join(data_dir, prefix + "_pos1.npy")) self.data_pos2 = np.load(os.path.join(data_dir, prefix + "_pos2.npy")) self.data_mask = np.load(os.path.join(data_dir, prefix + "_mask.npy")) self.data_rel = np.load(os.path.join(data_dir, prefix + "_label.npy")) self.data_length = np.load(os.path.join(data_dir, prefix + "_len.npy")) self.scope = np.load(os.path.join(data_dir, prefix + "_instance_scope.npy")) self.triple = np.load(os.path.join(data_dir, prefix + "_instance_triple.npy")) self.relfact_tot = len(self.triple) for i in range(self.scope.shape[0]): self.scope[i][1] += 1 self.instance_tot = self.data_word.shape[0] self.rel_tot = 53 if self.mode == self.MODE_INSTANCE: self.order = list(range(self.instance_tot)) else: self.order = list(range(len(self.scope))) self.idx = 0 if self.shuffle: random.shuffle(self.order) print("Total relation fact: %d" % (self.relfact_tot)) def __next__(self): return self.next_batch(self.batch_size) def next_batch(self, batch_size): if self.idx >= len(self.order): self.idx = 0 if self.shuffle: random.shuffle(self.order) raise StopIteration batch_data = {} if self.mode == self.MODE_INSTANCE: idx0 = self.idx idx1 = self.idx + batch_size if idx1 > len(self.order): self.idx = 0 if self.shuffle: random.shuffle(self.order) raise StopIteration self.idx = idx1 batch_data['word'] = self.data_word[idx0:idx1] batch_data['pos1'] = self.data_pos1[idx0:idx1] batch_data['pos2'] = self.data_pos2[idx0:idx1] batch_data['rel'] = self.data_rel[idx0:idx1] batch_data['length'] = self.data_length[idx0:idx1] batch_data['scope'] = np.stack([list(range(idx1 - idx0)), list(range(1, idx1 - idx0 + 1))], axis=1) elif self.mode == self.MODE_ENTPAIR_BAG or self.mode == self.MODE_RELFACT_BAG: idx0 = self.idx idx1 = self.idx + batch_size if idx1 > len(self.order): self.idx = 0 if self.shuffle: random.shuffle(self.order) raise StopIteration self.idx = idx1 _word = [] _pos1 = [] _pos2 = [] _rel = [] _ins_rel = [] _multi_rel = [] _length = [] _scope = [] _mask = [] cur_pos = 0 for i in range(idx0, idx1): _word.append(self.data_word[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) _pos1.append(self.data_pos1[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) _pos2.append(self.data_pos2[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) _rel.append(self.data_rel[self.scope[self.order[i]][0]]) _ins_rel.append(self.data_rel[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) _length.append(self.data_length[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) _mask.append(self.data_mask[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]]) bag_size = self.scope[self.order[i]][1] - self.scope[self.order[i]][0] _scope.append([cur_pos, cur_pos + bag_size]) cur_pos = cur_pos + bag_size if self.mode == self.MODE_ENTPAIR_BAG: _one_multi_rel = np.zeros((self.rel_tot), dtype=np.int32) for j in range(self.scope[self.order[i]][0], self.scope[self.order[i]][1]): _one_multi_rel[self.data_rel[j]] = 1 _multi_rel.append(_one_multi_rel) batch_data['word'] = np.concatenate(_word) batch_data['pos1'] = np.concatenate(_pos1) batch_data['pos2'] = np.concatenate(_pos2) batch_data['rel'] = np.stack(_rel) batch_data['ins_rel'] = np.concatenate(_ins_rel) if self.mode == self.MODE_ENTPAIR_BAG: batch_data['multi_rel'] = np.stack(_multi_rel) batch_data['length'] = np.concatenate(_length) batch_data['scope'] = np.stack(_scope) batch_data['mask'] = np.concatenate(_mask) return batch_data class json_file_data_loader(file_data_loader): MODE_INSTANCE = 0 # One batch contains batch_size instances. MODE_ENTPAIR_BAG = 1 # One batch contains batch_size bags, instances in which have the same entity pair (usually for testing). MODE_RELFACT_BAG = 2 # One batch contains batch size bags, instances in which have the same relation fact. (usually for training). def _load_preprocessed_file(self): name_prefix = '.'.join(self.file_name.split('/')[-1].split('.')[:-1]) word_vec_name_prefix = '.'.join(self.word_vec_file_name.split('/')[-1].split('.')[:-1]) processed_data_dir = '_processed_data' if not os.path.isdir(processed_data_dir): return False word_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_word.npy') pos1_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_pos1.npy') pos2_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_pos2.npy') rel_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_rel.npy') mask_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_mask.npy') length_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_length.npy') entpair2scope_file_name = os.path.join(processed_data_dir, name_prefix + '_entpair2scope.json') relfact2scope_file_name = os.path.join(processed_data_dir, name_prefix + '_relfact2scope.json') word_vec_mat_file_name = os.path.join(processed_data_dir, word_vec_name_prefix + '_mat.npy') word2id_file_name = os.path.join(processed_data_dir, word_vec_name_prefix + '_word2id.json') if not os.path.exists(word_npy_file_name) or \ not os.path.exists(pos1_npy_file_name) or \ not os.path.exists(pos2_npy_file_name) or \ not os.path.exists(rel_npy_file_name) or \ not os.path.exists(mask_npy_file_name) or \ not os.path.exists(length_npy_file_name) or \ not os.path.exists(entpair2scope_file_name) or \ not os.path.exists(relfact2scope_file_name) or \ not os.path.e

评论收藏

内容反馈

版权申诉