#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ===============================================================================
#
# Copyright (c) 2020-2022 Chatopera Inc. <https://www.chatopera.com> All Rights Reserved
#
#
# File: /Users/hain/chatopera/efaqa-corpus-zh/app/sample.py
# Author: Hai Liang Wang
# Date: 2020-04-22:09:40:24
#
# ===============================================================================
"""
"""
__copyright__ = "Copyright (c) 2020 Chatopera Inc <https://chatopera.com>. All Rights Reserved"
__author__ = "Hai Liang Wang"
__date__ = "2020-04-22:09:40:24"
__version__ = 1.1
import gzip
import logging
import json
import os
import sys
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, curdir)
if sys.version_info[0] < 3:
raise RuntimeError("Must be using Python 3")
else:
unicode = str
from chatoperastore import download_licensedfile
# Get ENV
EFAQA_DL_LICENSE = os.environ.get("EFAQA_DL_LICENSE", None)
CORPUS_DATA_PATH = os.path.join(curdir, "data", "efaqa-corpus-zh.utf8.gz")
'''
Sponsorship
'''
print("\n Emotional First Aid Dataset: v%s, Project home: %s" %
(__version__, "https://github.com/chatopera/efaqa-corpus-zh"))
print("\n Project Sponsored by Chatopera")
print("\n deliver your chatbots with Chatopera Cloud Services --> https://bot.chatopera.com\n")
print("\n Module file path: %s" % __file__)
print("\n ************ NOTICE ************")
print(" Require license to download model package, purchase from https://store.chatopera.com/product/efaqa001")
print(" ********************************\n")
try:
from smart_open import smart_open
except ImportError:
print(
"smart_open library not found; falling back to local-filesystem-only")
def make_closing(base, **attrs):
"""
Add support for `with Base(attrs) as fout:` to the base class if it's missing.
The base class' `close()` method will be called on context exit, to always close the file properly.
This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
raise "AttributeError: GzipFile instance has no attribute '__exit__'".
"""
if not hasattr(base, '__enter__'):
attrs['__enter__'] = lambda self: self
if not hasattr(base, '__exit__'):
attrs['__exit__'] = lambda self, type, value, traceback: self.close()
return type('Closing' + base.__name__, (base, object), attrs)
def smart_open(fname, mode='rb'):
_, ext = os.path.splitext(fname)
if ext == '.bz2':
from bz2 import BZ2File
return make_closing(BZ2File)(fname, mode)
if ext == '.gz':
from gzip import GzipFile
return make_closing(GzipFile)(fname, mode)
return open(fname, mode)
def load(data_path=CORPUS_DATA_PATH):
"""
加载数据集数据
"""
if not os.path.exists(data_path):
download_licensedfile(EFAQA_DL_LICENSE, data_path)
try:
with smart_open(data_path) as f:
for x in f:
yield json.loads(x)
except BaseException as e:
print(e)
raise e
l = list(load())
print("[Emotional First Aid Dataset] Loaded data corpus size: %s" % len(l))
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
人工智能-项目实践-问答系统-Emotional First Aid Dataset, 心理咨询问答、聊天机器人语料库 心理咨询问答语料库(以下也称为“数据集”,“语料库”)是为应用人工智能技术于心理咨询领域制作的语料。据我们所知,这是心理咨询领域首个开放的 QA 语料库,包括 20,000 条心理咨询数据,也是迄今公开的最大的中文心理咨询对话语料(发稿日期 2022-04-07)。数据集内容丰富,不但具备多轮对话内容,也有分类等信息,制作过程耗费大量时间和精力,比如标注过程是面向多轮对话,平均每条标记耗时超过 1 分钟。
资源推荐
资源详情
资源评论
收起资源包目录
人工智能-项目实践-问答系统-Emotional First Aid Dataset, 心理咨询问答、聊天机器人语料库.zip (8个子文件)
efaqa-corpus-zh-master
setup.py 2KB
assets
2.jpg 83KB
1.jpg 136KB
efaqa_corpus_zh
__init__.py 3KB
data
__init__.py 13B
.gitignore 26B
publish.sh 415B
demo.py 2KB
共 8 条
- 1
资源评论
- judent2024-03-13这个资源总结的也太全面了吧,内容详实,对我帮助很大。
博士僧小星
- 粉丝: 1774
- 资源: 5875
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功