/*
* <author>Han He</author>
* <email>me@hankcs.com</email>
* <create-date>2020-12-26 11:54 PM</create-date>
*
* <copyright file="HanLPClient.java">
* Copyright (c) 2020, Han He. All Rights Reserved, http://www.hankcs.com/
* See LICENSE file in the project root for full license information.
* </copyright>
*/
package com.hankcs.hanlp.restful;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.hankcs.hanlp.restful.mrp.MeaningRepresentation;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.*;
/**
* A RESTful client implementing the data format specification of HanLP.
*
* @author hankcs
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public class HanLPClient
{
private String url;
private String auth;
private String language;
private int timeout;
private ObjectMapper mapper;
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
* @param language The language this client will be expecting. Contact the service provider for the list of
* languages supported. Conventionally, zh is used for Chinese and mul for multilingual.
* Leave null to use the default language on server.
* @param timeout Maximum waiting time in seconds for a request.
*/
public HanLPClient(String url, String auth, String language, int timeout)
{
if (auth == null)
{
auth = System.getenv().getOrDefault("HANLP_AUTH", null);
}
this.url = url;
this.auth = auth;
this.language = language;
this.timeout = timeout * 1000;
this.mapper = new ObjectMapper();
}
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
*/
public HanLPClient(String url, String auth)
{
this(url, auth, null, 5);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new DocumentInput(text, tasks, skipTasks, language)), Map.class);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text) throws IOException
{
return parse(text, null, null);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new SentenceInput(sentences, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences) throws IOException
{
return parse(sentences, null, null);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new TokenInput(tokens, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens) throws IOException
{
return parse(tokens, null, null);
}
/**
* Split a document into sentences and tokenize them.
*
* @param text A document.
* @param coarse Whether to perform coarse-grained or fine-grained tokenization.
* @return A list of tokenized sentences.
* @throws IOException HTTP exception.
*/
public List<List<String>> tokenize(String text, Boolean coarse) throws IOException
{
String[] tasks;
if (coarse != null)
{
if (coarse)
tasks = new String[]{"tok/coarse"};
else
tasks = new String[]{"tok/fine"};
}
else
tasks = new String[]{"tok"};
Map<String, List> doc = parse(text, tasks, null);
//noinspection unchecked
return doc.values().iterator().next();
}
/**
* Split a document into sentences and tokenize them using fine-grained standard.
*
* @param text A document.
* @return A list of tokenized sentences.
* @throws IOException HTTP exception.
*/
public List<List<String>> tokenize(String text) throws IOException
{
return tokenize(text, null);
}
/**
* Text style transfer aims to change the style of the input text to the target style while preserving its content.
*
* @param text Source text.
* @param targetStyle Target style.
* @return Text of the target style.
*/
public List<String> textStyleTransfer(List<String> text, String targetStyle) throws IOException
{
Map<String, Object> input = new HashMap<>();
input.put("text", text);
input.put("target_style", targetStyle);
input.put("language", language);
//noinspection unchecked
return mapper.readValue(post("/text_style_transfer", input), List.class);
}
/**
* Text style transfer aims to change the style of the input text to the target style while preserving its content.
*
* @param text Source text.
* @param targetStyle Target style.
* @return Text of the target style.
*/
public String textStyleTransfer(String text, String targetStyle) throws IOException
{
Map<String, Object> input = new HashMap<>();
input.put("text", text);
input.put("target_style", targetStyle);
input.put("language", language);
return mapper.readValue(post("/text_style_transfer", input), String.class);
}
/**
* Grammatical Error Correction (GEC) is the task of correcting different kinds of errors in text such as
* spelling, punctuation, grammatical, and word choice errors.
*
* @param text Text potentially containing diffe
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
本项目为hanLP自然语言处理框架的设计源码,主要采用Python编写,辅以Java进行跨平台开发。项目文件共计660个,其中包括475个Python源文件、157个Markdown文档、13个Java源文件、8个reStructuredText文件、2个YAML配置文件、1个Git忽略规则文件、1个软件许可证文件、1个Makefile构建文件、1个BibLaTeX参考文献文件、1个XML文件。该源码库旨在为自然语言处理提供高效、可靠的解决方案。
资源推荐
资源详情
资源评论
收起资源包目录
基于Python和Java的hanLP自然语言处理设计源码 (654个子文件)
references.bib 42KB
.gitignore 4KB
HanLPClient.java 24KB
HanLPClientTest.java 9KB
MeaningRepresentationTest.java 3KB
Span.java 1KB
CoreferenceResolutionOutput.java 802B
BaseInput.java 674B
TokenInput.java 635B
SentenceInput.java 632B
MeaningRepresentation.java 626B
DocumentInput.java 624B
Node.java 531B
Edge.java 469B
Anchor.java 448B
LICENSE 11KB
Makefile 634B
msr.md 135KB
ctb.md 46KB
863.md 21KB
pku.md 19KB
semeval16.md 16KB
ud.md 9KB
ctb.md 9KB
pku.md 8KB
ptb.md 7KB
npcmj.md 6KB
ctb.md 6KB
data_format.md 5KB
tutorial.md 5KB
install.md 5KB
sd_zh.md 5KB
sd_en.md 4KB
pmt.md 3KB
msra.md 3KB
cpb.md 3KB
configure.md 3KB
npcmj.md 3KB
resources.md 3KB
propbank.md 2KB
ontonotes.md 2KB
word2vec.md 2KB
index.md 2KB
ud.md 2KB
contributing.md 2KB
pull_request_template.md 1KB
mlm.md 1KB
constituency.md 1KB
amr2text.md 1KB
dataset.md 1KB
pos.md 1KB
tok.md 1KB
bug_report.md 1KB
amr.md 1KB
resources.md 996B
resources.md 971B
restful_java.md 963B
sts.md 931B
srl.md 918B
resources.md 914B
index.md 839B
restful_golang.md 836B
feature_request.md 664B
resources.md 493B
index.md 400B
resources.md 356B
ud.md 303B
multi_criteria.md 292B
biaffine_ner.md 270B
rank_srl.md 265B
bio_srl.md 264B
tag_ner.md 264B
embedding.md 257B
ud_parser.md 256B
sdp.md 245B
transformer.md 243B
constituency.md 236B
transformer_ner.md 232B
biaffine_ner.md 231B
dep.md 228B
pos.md 225B
tok.md 224B
mtl.md 223B
lem.md 222B
crf_constituency_parser.md 222B
transformer.md 218B
biaffine_sdp.md 207B
transformer_tagger.md 206B
rnn_ner.md 200B
biaffine_dep.md 199B
pipeline.md 198B
fasttext.md 195B
word2vec.md 193B
vocab.md 192B
structure.md 186B
mcws_dataset.md 184B
task.md 183B
classifiers.md 183B
dictionary.md 183B
index.md 181B
共 654 条
- 1
- 2
- 3
- 4
- 5
- 6
- 7
资源评论
lly202406
- 粉丝: 2475
- 资源: 5407
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功