import tensorflow as tf
from tensorflow.keras import layers
from bert import BertModelLayer, albert_params, params_from_pretrained_ckpt, fetch_brightmart_albert_model
from bert.tokenization import albert_tokenization
import numpy as np
import tensorflow_addons as tf_ad
from transformer import Transformer
tag_map = ['O',
'attribution_s', 'attribution_m', 'attribution_e',
'mainPart_s', 'mainPart_m', 'mainPart_e',
'property_s', 'property_m', 'property_e',
'value_s', 'value_m', 'value_e']
"""
简要版
每次输入一段文本进行训练
"""
def split(data):
"""
:param data: 预测值
:return:
"""
rt = []
tag = []
cache = []
o_c = 0
for index,x in enumerate(data):
cache.append(str(index))
if x =='O':
o_c += 1
if ('_S' in x and len(cache) > 1) or (x == 'O' and o_c == 1 and len(cache)> 1):
if o_c != 1 or not (x == 'O' and o_c == 1 and len(cache)> 1):
o_c = 0
rt.append('_'.join(cache[:-1]))
cache = [cache[-1]]
if len(data[index-1]) > 1:
tag.append(data[index-1][:-2])
else:
tag.append(data[index-1])
if len(cache)!=0:
rt.append('_'.join(cache))
if len(data[-1])>1:
tag.append(data[-1][:-2])
else:
tag.append(data[-1])
return rt, tag
class Ner(tf.keras.Model):
def __init__(self):
super(Ner, self).__init__()
config ={
"albert_name":"albert_tiny_489k",
"dir":"./data/model",
"hiddent_size":312,
"hiddent_size2": 312*2,
"dropout": 0.5,
"max_sentences":64
}
self.config = config
model_name = config['albert_name']
model_dir = fetch_brightmart_albert_model(model_name, config['dir'])
bert_params = params_from_pretrained_ckpt(model_dir)
bert_model = BertModelLayer.from_params(bert_params, name='bert')
# bert_model.build(input_shape=(None, 256))
self.albert = bert_model
self.bilstm = layers.Bidirectional(layers.LSTM(config['hiddent_size'], return_sequences=True))
self.ner_dense = layers.Dense(13, name='ner_dense')
self.dropout = layers.Dropout(config['dropout'], noise_shape=[None, 1, config['hiddent_size']])
self.transition_params = tf.Variable(tf.random.uniform(shape=(13, 13)), trainable=True, name='transition_params')
self.sentences_pos_embedding = tf.Variable(tf.random.uniform(shape=(config['max_sentences'], config['hiddent_size2'])), trainable=True, name='sentences_pos_embedding')
self.transformer_2 = Transformer(config['hiddent_size2'], encoder_stack=1, name='transformer-2', trainable=True)
# self.transformer_2 = layers.LSTM(config['hiddent_size2'], return_sequences=True)
# self.transformer_2 = layers.Bidirectional(layers.LSTM(config['hiddent_size'], return_sequences=True))
self.type_embedding = tf.Variable(tf.random.uniform(shape=(4, config['hiddent_size2'])), trainable=True, name='type_embedding')
self.no_entity_emedding = tf.Variable(tf.random.uniform(shape=(1, config['hiddent_size2'])), trainable=True, name='no_entity_emedding')
# self.transformer_3 = Transformer(config['hiddent_size2'], encoder_stack=1, name='transformer-3', trainable=True)
# self.transformer_3 = layers.LSTM(config['hiddent_size2'], return_sequences=True)
self.transformer_3 = layers.Bidirectional(layers.LSTM(config['hiddent_size'], return_sequences=True))
self.classifier = [layers.Dense(1, name='classifier_1'),
layers.Dense(1, name='classifier_2'),
layers.Dense(1, name='classifier_3'),
layers.Dense(1, name='classifier_4')
]
self.node_list = ['property'.upper(),'attribution'.upper(),'value'.upper(),'mainPart'.upper()]
def __encode(self, text, mask, training=True):
"""
第一层编码
:param text: tf.int32 [None, seq_length]
:param mask: tf.bool [None, seq_length]
:param training: bool
:return: [None, seq_length, albert_output_size]
"""
em = self.albert(text, mask, training)
em = self.dropout(em, training=training)
em = self.bilstm(em)
return em
def __ner_decode(self, logits, mask, tag=None, training=True):
"""
实体识别部分的解码
:param logits: 序列编码 tf.float32 [None, seq_length, albert_output_size]
:param mask: 序列实际长度 tf.int32 [None]
:param tag: 标签 tf.int32 [None, pos_size]
:return:
"""
# logits = self.dropout(logits, training=training)
# logits = self.bilstm(logits)
# ft = self.ner_dense(ft)
ft = self.ner_dense(logits)
if tag!=None:
log_likelihood, self.transition_params = tf_ad.text.crf.crf_log_likelihood(
ft, tag, mask, transition_params=self.transition_params)
loss = tf.reduce_mean(-log_likelihood)
return ft, loss
else:
return ft
def __sentence_and_entity_embedding(self, logits, entity_index, entity_masks, training=True):
"""
提取实体编码
:param logits: 第一个编码层的输出 tf.float32 [None ,seq_length, albert_output_size]
:param entity_index: 实体的坐标,tf.int32 [entity_count, 3]
:param entity_masks: 实体类型 [entity_count] 例如 [0,0,1,1,0] 连续的数字代表是相同的字段
:param training:
:return:
"""
sentences_embedding = tf.reduce_max(logits, axis=1) \
+ self.sentences_pos_embedding[:tf.shape(logits)[0]]
entity = []
# 连接
# for index in range(entity_index.shape[0]):
for x in entity_index:
# x = entity_index[index]
entity.append(tf.reshape(tf.reduce_max(logits[x[0],x[1]:x[2]], axis=0), [1, self.config['hiddent_size2']]))
entity_embedding = tf.concat(entity, axis=0)
entity_pos_embedding = tf.nn.embedding_lookup(self.sentences_pos_embedding, entity_index[:,0])
entity_embedding = entity_embedding+entity_pos_embedding
all_embedding = tf.reshape(tf.concat([sentences_embedding, entity_embedding], axis=0), [1, -1, self.config['hiddent_size2']])
# 第二层编码
all_embedding = self.transformer_2(all_embedding,
tf.convert_to_tensor(
np.zeros(tf.shape(all_embedding)[:-1]), dtype=tf.bool), training)
# all_embedding = self.transformer_2(all_embedding)
all_embedding = tf.reshape(all_embedding, [-1, self.config['hiddent_size2']])
entity_size = tf.shape(entity_embedding)[0]
entity_embedding = all_embedding[-entity_size:]
sentences_embedding = all_embedding[:-entity_size]
document_embedding = tf.reduce_max(sentences_embedding, axis=0)
# 合并相同的实体
entity_embedding_m = []
cache = []
last = entity_masks[0]
for index, x in enumerate(entity_embedding):
# for index in range(entity_size):
x = entity_embedding[index]
if entity_masks[index]==last:
cache.append(tf.reshape(x, [1, -1]))
else:
entity_embedding_m.append(tf.reshape(
tf.reduce_max(
tf.concat(cache, axis=0),
axis=0, keepdims=True),
[1, -1]))
cache = [tf.reshape(
没有合适的资源?快使用搜索试试~ 我知道了~
model_ner_ner_源码
![preview](https://csdnimg.cn/release/downloadcmsfe/public/img/white-bg.ca8570fa.png)
共1个文件
py:1个
![preview-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/scale.ab9e0183.png)
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 171 浏览量
2021-10-04
01:45:56
上传
评论
收藏 6KB ZIP 举报
温馨提示
基于tf2.x 实现,只是模型结果代码,无训练代码也没训练数据,效果尚可
资源推荐
资源详情
资源评论
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
收起资源包目录
![package](https://csdnimg.cn/release/downloadcmsfe/public/img/package.f3fc750b.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
共 1 条
- 1
资源评论
![avatar-default](https://csdnimg.cn/release/downloadcmsfe/public/img/lazyLogo2.1882d7f4.png)
![avatar](https://profile-avatar.csdnimg.cn/e6e0941d327e4e3b957168e61141d8c5_weixin_42676824.jpg!1)
周玉坤举重
- 粉丝: 63
- 资源: 4779
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助
![voice](https://csdnimg.cn/release/downloadcmsfe/public/img/voice.245cc511.png)
![center-task](https://csdnimg.cn/release/downloadcmsfe/public/img/center-task.c2eda91a.png)
安全验证
文档复制为VIP权益,开通VIP直接复制
![dialog-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/green-success.6a4acb44.png)