from six import iteritems
import json
import os
import multiprocessing
import numpy as np
import random
class file_data_loader:
def __next__(self):
raise NotImplementedError
def next(self):
return self.__next__()
def next_batch(self, batch_size):
raise NotImplementedError
class npy_data_loader(file_data_loader):
MODE_INSTANCE = 0 # One batch contains batch_size instances.
MODE_ENTPAIR_BAG = 1 # One batch contains batch_size bags, instances in which have the same entity pair (usually for testing).
MODE_RELFACT_BAG = 2 # One batch contains batch size bags, instances in which have the same relation fact. (usually for training).
def __iter__(self):
return self
def __init__(self, data_dir, prefix, mode, word_vec_npy='vec.npy', shuffle=True, max_length=120, batch_size=160):
if not os.path.isdir(data_dir):
raise Exception("[ERROR] Data dir doesn't exist!")
self.mode = mode
self.shuffle = shuffle
self.max_length = max_length
self.batch_size = batch_size
self.word_vec_mat = np.load(os.path.join(data_dir, word_vec_npy))
self.data_word = np.load(os.path.join(data_dir, prefix + "_word.npy"))
self.data_pos1 = np.load(os.path.join(data_dir, prefix + "_pos1.npy"))
self.data_pos2 = np.load(os.path.join(data_dir, prefix + "_pos2.npy"))
self.data_mask = np.load(os.path.join(data_dir, prefix + "_mask.npy"))
self.data_rel = np.load(os.path.join(data_dir, prefix + "_label.npy"))
self.data_length = np.load(os.path.join(data_dir, prefix + "_len.npy"))
self.scope = np.load(os.path.join(data_dir, prefix + "_instance_scope.npy"))
self.triple = np.load(os.path.join(data_dir, prefix + "_instance_triple.npy"))
self.relfact_tot = len(self.triple)
for i in range(self.scope.shape[0]):
self.scope[i][1] += 1
self.instance_tot = self.data_word.shape[0]
self.rel_tot = 53
if self.mode == self.MODE_INSTANCE:
self.order = list(range(self.instance_tot))
else:
self.order = list(range(len(self.scope)))
self.idx = 0
if self.shuffle:
random.shuffle(self.order)
print("Total relation fact: %d" % (self.relfact_tot))
def __next__(self):
return self.next_batch(self.batch_size)
def next_batch(self, batch_size):
if self.idx >= len(self.order):
self.idx = 0
if self.shuffle:
random.shuffle(self.order)
raise StopIteration
batch_data = {}
if self.mode == self.MODE_INSTANCE:
idx0 = self.idx
idx1 = self.idx + batch_size
if idx1 > len(self.order):
self.idx = 0
if self.shuffle:
random.shuffle(self.order)
raise StopIteration
self.idx = idx1
batch_data['word'] = self.data_word[idx0:idx1]
batch_data['pos1'] = self.data_pos1[idx0:idx1]
batch_data['pos2'] = self.data_pos2[idx0:idx1]
batch_data['rel'] = self.data_rel[idx0:idx1]
batch_data['length'] = self.data_length[idx0:idx1]
batch_data['scope'] = np.stack([list(range(idx1 - idx0)), list(range(1, idx1 - idx0 + 1))], axis=1)
elif self.mode == self.MODE_ENTPAIR_BAG or self.mode == self.MODE_RELFACT_BAG:
idx0 = self.idx
idx1 = self.idx + batch_size
if idx1 > len(self.order):
self.idx = 0
if self.shuffle:
random.shuffle(self.order)
raise StopIteration
self.idx = idx1
_word = []
_pos1 = []
_pos2 = []
_rel = []
_ins_rel = []
_multi_rel = []
_length = []
_scope = []
_mask = []
cur_pos = 0
for i in range(idx0, idx1):
_word.append(self.data_word[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
_pos1.append(self.data_pos1[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
_pos2.append(self.data_pos2[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
_rel.append(self.data_rel[self.scope[self.order[i]][0]])
_ins_rel.append(self.data_rel[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
_length.append(self.data_length[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
_mask.append(self.data_mask[self.scope[self.order[i]][0]:self.scope[self.order[i]][1]])
bag_size = self.scope[self.order[i]][1] - self.scope[self.order[i]][0]
_scope.append([cur_pos, cur_pos + bag_size])
cur_pos = cur_pos + bag_size
if self.mode == self.MODE_ENTPAIR_BAG:
_one_multi_rel = np.zeros((self.rel_tot), dtype=np.int32)
for j in range(self.scope[self.order[i]][0], self.scope[self.order[i]][1]):
_one_multi_rel[self.data_rel[j]] = 1
_multi_rel.append(_one_multi_rel)
batch_data['word'] = np.concatenate(_word)
batch_data['pos1'] = np.concatenate(_pos1)
batch_data['pos2'] = np.concatenate(_pos2)
batch_data['rel'] = np.stack(_rel)
batch_data['ins_rel'] = np.concatenate(_ins_rel)
if self.mode == self.MODE_ENTPAIR_BAG:
batch_data['multi_rel'] = np.stack(_multi_rel)
batch_data['length'] = np.concatenate(_length)
batch_data['scope'] = np.stack(_scope)
batch_data['mask'] = np.concatenate(_mask)
return batch_data
class json_file_data_loader(file_data_loader):
MODE_INSTANCE = 0 # One batch contains batch_size instances.
MODE_ENTPAIR_BAG = 1 # One batch contains batch_size bags, instances in which have the same entity pair (usually for testing).
MODE_RELFACT_BAG = 2 # One batch contains batch size bags, instances in which have the same relation fact. (usually for training).
def _load_preprocessed_file(self):
name_prefix = '.'.join(self.file_name.split('/')[-1].split('.')[:-1])
word_vec_name_prefix = '.'.join(self.word_vec_file_name.split('/')[-1].split('.')[:-1])
processed_data_dir = '_processed_data'
if not os.path.isdir(processed_data_dir):
return False
word_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_word.npy')
pos1_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_pos1.npy')
pos2_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_pos2.npy')
rel_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_rel.npy')
mask_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_mask.npy')
length_npy_file_name = os.path.join(processed_data_dir, name_prefix + '_length.npy')
entpair2scope_file_name = os.path.join(processed_data_dir, name_prefix + '_entpair2scope.json')
relfact2scope_file_name = os.path.join(processed_data_dir, name_prefix + '_relfact2scope.json')
word_vec_mat_file_name = os.path.join(processed_data_dir, word_vec_name_prefix + '_mat.npy')
word2id_file_name = os.path.join(processed_data_dir, word_vec_name_prefix + '_word2id.json')
if not os.path.exists(word_npy_file_name) or \
not os.path.exists(pos1_npy_file_name) or \
not os.path.exists(pos2_npy_file_name) or \
not os.path.exists(rel_npy_file_name) or \
not os.path.exists(mask_npy_file_name) or \
not os.path.exists(length_npy_file_name) or \
not os.path.exists(entpair2scope_file_name) or \
not os.path.exists(relfact2scope_file_name) or \
not os.path.e
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
基于注意力机制的双向长短期记忆网络的关系分类.zip (15个子文件)
Att-BiLSTM-OpenNRE-master
nrekit
rl.py 12KB
data_loader.py 26KB
network
embedding.py 2KB
selector.py 7KB
classifier.py 2KB
encoder.py 3KB
framework.py 17KB
.idea
OpenNRE.iml 459B
workspace.xml 20KB
misc.xml 236B
inspectionProfiles
profiles_settings.xml 156B
modules.xml 266B
test_demo.py 5KB
train_demo.py 6KB
draw_plot.py 1KB
共 15 条
- 1
资源评论
博士僧小星
- 粉丝: 1907
- 资源: 5877
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- CocosDashboard-v2.1.3-win-042311.exe
- 基于yolov5和deepsort算法的车辆检测项目源码+数据集(高分项目).zip
- 基于YOLOv5+Deepsort实现车辆行人追踪和计数(完整源码+说明文档+数据).zip
- 150360364459673Pex10_9.py
- 74ls138译码器Multisim仿真设计
- 深度学习在图像分类任务中的应用
- 华为OD资源整理-包含华为OD概述,招聘流程等
- 基于BP神经网络的供热管道压力和温度预测(代码完整,数据齐全)
- 架空电力线弧垂计算公式
- 基于BP神经网络的四分类预测,基于RBF的四分类预测,基于GRNN的四分类预测,基于PNN的四分类预测
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功