# -*- coding:utf8 -*-
# ==============================================================================
# Copyright 2017 Baidu.com, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
This module computes evaluation metrics for DuReader dataset.
"""
import argparse
import json
import sys
import zipfile
from collections import Counter
# from .bleu_metric.bleu import Bleu
from .rouge import Rouge
EMPTY = ''
YESNO_LABELS = set(['Yes', 'No', 'Depends'])
def normalize(s):
"""
Normalize strings to space joined chars.
Args:
s: a list of strings.
Returns:
A list of normalized strings.
"""
if not s:
return s
normalized = []
for ss in s:
tokens = [c for c in list(ss) if len(c.strip()) != 0]
normalized.append(' '.join(tokens))
return normalized
def data_check(obj, task):
"""
Check data.
Raises:
Raises AssertionError when data is not legal.
"""
assert 'question_id' in obj, "Missing 'question_id' field."
assert 'question_type' in obj, \
"Missing 'question_type' field. question_id: {}".format(obj['question_type'])
assert 'yesno_answers' in obj, \
"Missing 'yesno_answers' field. question_id: {}".format(obj['question_id'])
assert isinstance(obj['yesno_answers'], list), \
r"""'yesno_answers' field must be a list, if the 'question_type' is not
'YES_NO', then this field should be an empty list.
question_id: {}""".format(obj['question_id'])
assert 'entity_answers' in obj, \
"Missing 'entity_answers' field. question_id: {}".format(obj['question_id'])
assert isinstance(obj['entity_answers'], list) \
and len(obj['entity_answers']) > 0, \
r"""'entity_answers' field must be a list, and has at least one element,
which can be a empty list. question_id: {}""".format(obj['question_id'])
def read_file(file_name, task, is_ref=False):
"""
Read predict answers or reference answers from file.
Args:
file_name: the name of the file containing predict result or reference
result.
Returns:
A dictionary mapping question_id to the result information. The result
information itself is also a dictionary with has four keys:
- question_type: type of the query.
- yesno_answers: A list of yesno answers corresponding to 'answers'.
- answers: A list of predicted answers.
- entity_answers: A list, each element is also a list containing the entities
tagged out from the corresponding answer string.
"""
def _open(file_name, mode, zip_obj=None):
if zip_obj is not None:
return zip_obj.open(file_name, mode)
return open(file_name, mode)
results = {}
keys = ['answers', 'yesno_answers', 'entity_answers', 'question_type']
if is_ref:
keys += ['source']
zf = zipfile.ZipFile(file_name, 'r') if file_name.endswith('.zip') else None
file_list = [file_name] if zf is None else zf.namelist()
for fn in file_list:
for line in _open(fn, 'r', zip_obj=zf):
try:
obj = json.loads(line.strip())
except ValueError:
raise ValueError("Every line of data should be legal json")
data_check(obj, task)
qid = obj['question_id']
assert qid not in results, "Duplicate question_id: {}".format(qid)
results[qid] = {}
for k in keys:
results[qid][k] = obj[k]
return results
def compute_bleu_rouge(pred_dict, ref_dict, bleu_order=4):
"""
Compute bleu and rouge scores.
"""
assert set(pred_dict.keys()) == set(ref_dict.keys()), \
"missing keys: {}".format(set(ref_dict.keys()) - set(pred_dict.keys()))
scores = {}
# bleu_scores, _ = Bleu(bleu_order).compute_score(ref_dict, pred_dict)
# for i, bleu_score in enumerate(bleu_scores):
# scores['Bleu-%d' % (i + 1)] = bleu_score
rouge_score, _ = Rouge().compute_score(ref_dict, pred_dict)
scores['Rouge-L'] = rouge_score
return scores
def local_prf(pred_list, ref_list):
"""
Compute local precision recall and f1-score,
given only one prediction list and one reference list
"""
common = Counter(pred_list) & Counter(ref_list)
num_same = sum(common.values())
if num_same == 0:
return 0, 0, 0
p = 1.0 * num_same / len(pred_list)
r = 1.0 * num_same / len(ref_list)
f1 = (2 * p * r) / (p + r)
return p, r, f1
def compute_prf(pred_dict, ref_dict):
"""
Compute precision recall and f1-score.
"""
pred_question_ids = set(pred_dict.keys())
ref_question_ids = set(ref_dict.keys())
correct_preds, total_correct, total_preds = 0, 0, 0
for question_id in ref_question_ids:
pred_entity_list = pred_dict.get(question_id, [[]])
assert len(pred_entity_list) == 1, \
'the number of entity list for question_id {} is not 1.'.format(question_id)
pred_entity_list = pred_entity_list[0]
all_ref_entity_lists = ref_dict[question_id]
best_local_f1 = 0
best_ref_entity_list = None
for ref_entity_list in all_ref_entity_lists:
local_f1 = local_prf(pred_entity_list, ref_entity_list)[2]
if local_f1 > best_local_f1:
best_ref_entity_list = ref_entity_list
best_local_f1 = local_f1
if best_ref_entity_list is None:
if len(all_ref_entity_lists) > 0:
best_ref_entity_list = sorted(all_ref_entity_lists,
key=lambda x: len(x))[0]
else:
best_ref_entity_list = []
gold_entities = set(best_ref_entity_list)
pred_entities = set(pred_entity_list)
correct_preds += len(gold_entities & pred_entities)
total_preds += len(pred_entities)
total_correct += len(gold_entities)
p = float(correct_preds) / total_preds if correct_preds > 0 else 0
r = float(correct_preds) / total_correct if correct_preds > 0 else 0
f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
return {'Precision': p, 'Recall': r, 'F1': f1}
def prepare_prf(pred_dict, ref_dict):
"""
Prepares data for calculation of prf scores.
"""
preds = {k: v['entity_answers'] for k, v in pred_dict.items()}
refs = {k: v['entity_answers'] for k, v in ref_dict.items()}
return preds, refs
def filter_dict(result_dict, key_tag):
"""
Filter a subset of the result_dict, where keys ends with 'key_tag'.
"""
filtered = {}
for k, v in result_dict.items():
if k.endswith(key_tag):
filtered[k] = v
return filtered
def get_metrics(pred_result, ref_result, task, source):
"""
Computes metrics.
"""
metrics = {}
ref_result_filtered = {}
pred_result_filtered = {}
if source == 'both':
ref_result_filtered = ref_result
pred_result_filtered = pred_result
else:
for question_id, info in ref_result.items():
if info['source'] == source:
ref_result_filtered[question_id] = info
if question_id in pred_result:
pred_result_filtered[question_id] = pred_result[question_id]
if task == 'main' or task == 'all' \
没有合适的资源?快使用搜索试试~ 我知道了~
数据集.zip数据集.zip数据集.zip
![preview](https://csdnimg.cn/release/downloadcmsfe/public/img/white-bg.ca8570fa.png)
共38个文件
py:20个
pyc:15个
json:3个
![preview-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/scale.ab9e0183.png)
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 198 浏览量
2023-01-13
13:16:21
上传
评论
收藏 9.99MB ZIP 举报
温馨提示
数据集.zip
资源推荐
资源详情
资源评论
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![7z](https://img-home.csdnimg.cn/images/20210720083312.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
收起资源包目录
![package](https://csdnimg.cn/release/downloadcmsfe/public/img/package.f3fc750b.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
共 38 条
- 1
资源评论
![avatar-default](https://csdnimg.cn/release/downloadcmsfe/public/img/lazyLogo2.1882d7f4.png)
![avatar](https://profile-avatar.csdnimg.cn/8026cd45bb2448efa9c5fd5e435b208c_qq_38735017.jpg!1)
甜辣uu
- 粉丝: 8906
- 资源: 1101
![benefits](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-1.c8e153b4.png)
下载权益
![privilege](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-2.ec46750a.png)
C知道特权
![article](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-3.fc5e5fb6.png)
VIP文章
![course-privilege](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-4.320a6894.png)
课程特权
![rights](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-icon.fe0226a8.png)
开通VIP
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助
![voice](https://csdnimg.cn/release/downloadcmsfe/public/img/voice.245cc511.png)
![center-task](https://csdnimg.cn/release/downloadcmsfe/public/img/center-task.c2eda91a.png)
安全验证
文档复制为VIP权益,开通VIP直接复制
![dialog-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/green-success.6a4acb44.png)