/*
* <author>Han He</author>
* <email>me@hankcs.com</email>
* <create-date>2020-12-26 11:54 PM</create-date>
*
* <copyright file="HanLPClient.java">
* Copyright (c) 2020, Han He. All Rights Reserved, http://www.hankcs.com/
* See LICENSE file in the project root for full license information.
* </copyright>
*/
package com.hankcs.hanlp.restful;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
/**
* A RESTful client implementing the data format specification of HanLP.
*
* @author hankcs
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public class HanLPClient
{
private String url;
private String auth;
private String language;
private int timeout;
private ObjectMapper mapper;
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
* @param language The language this client will be expecting. Contact the service provider for the list of
* languages supported. Conventionally, zh is used for Chinese and mul for multilingual.
* Leave null to use the default language on server.
* @param timeout Maximum waiting time in seconds for a request.
*/
public HanLPClient(String url, String auth, String language, int timeout)
{
if (auth == null)
{
auth = System.getenv().getOrDefault("HANLP_AUTH", null);
}
this.url = url;
this.auth = auth;
this.language = language;
this.timeout = timeout * 1000;
this.mapper = new ObjectMapper();
}
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
*/
public HanLPClient(String url, String auth)
{
this(url, auth, null, 5);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new DocumentInput(text, tasks, skipTasks, language)), Map.class);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text) throws IOException
{
return parse(text, null, null);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new SentenceInput(sentences, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences) throws IOException
{
return parse(sentences, null, null);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new TokenInput(tokens, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens) throws IOException
{
return parse(tokens, null, null);
}
private String post(String api, BaseInput input_) throws IOException
{
URL url = new URL(this.url + api);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("POST");
if (auth != null)
con.setRequestProperty("Authorization", "Basic " + auth);
con.setRequestProperty("Content-Type", "application/json; utf-8");
con.setRequestProperty("Accept", "application/json");
con.setDoOutput(true);
con.setConnectTimeout(timeout);
con.setReadTimeout(timeout);
String jsonInputString = mapper.writeValueAsString(input_);
try (OutputStream os = con.getOutputStream())
{
byte[] input = jsonInputString.getBytes(StandardCharsets.UTF_8);
os.write(input, 0, input.length);
}
int code = con.getResponseCode();
if (code != 200)
{
throw new IOException(String.format("Request failed, status code = %d, error = %s", code, con.getResponseMessage()));
}
StringBuilder response = new StringBuilder();
try (BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), StandardCharsets.UTF_8)))
{
String responseLine;
while ((responseLine = br.readLine()) != null)
{
response.append(responseLine.trim());
}
}
return response.toString();
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
本项目为基于Python和Java开发的HanLP自然语言处理源码集,总计包含570个文件,其中Python源文件408个,文档类文件139个,Java源文件6个。功能涵盖中文分词、词性标注、命名实体识别、依存句法分析、语义依存分析、新词发现、关键词短语提取、自动摘要、文本分类聚类、拼音简繁转换等自然语言处理关键技术,旨在提供全面的自然语言处理解决方案。
资源推荐
资源详情
资源评论
收起资源包目录
基于Python和Java的HanLP自然语言处理设计源码 (571个子文件)
references.bib 20KB
.gitignore 4KB
hanlp.go 6KB
def.go 3KB
option.go 1KB
main_test.go 607B
HanLPClient.java 7KB
HanLPClientTest.java 2KB
BaseInput.java 674B
TokenInput.java 635B
SentenceInput.java 632B
DocumentInput.java 624B
LICENSE 11KB
Makefile 634B
pku.md 19KB
ctb.md 8KB
semeval16.md 8KB
sd.md 8KB
pku.md 8KB
ptb.md 7KB
863.md 6KB
ctb.md 6KB
data_format.md 5KB
tutorial.md 4KB
install.md 4KB
ud.md 4KB
msra.md 3KB
cpb.md 3KB
configure.md 3KB
resources.md 2KB
propbank.md 2KB
ontonotes.md 2KB
index.md 2KB
ud.md 2KB
contributing.md 2KB
bug_report.md 1KB
dataset.md 1KB
resources.md 971B
restful_java.md 959B
resources.md 951B
resources.md 914B
index.md 839B
feature_request.md 826B
resources.md 493B
index.md 362B
resources.md 356B
ud.md 303B
multi_criteria.md 292B
biaffine_ner.md 270B
rank_srl.md 265B
bio_srl.md 264B
tag_ner.md 264B
embedding.md 257B
ud_parser.md 256B
sdp.md 245B
transformer.md 243B
constituency.md 236B
transformer_ner.md 232B
biaffine_ner.md 231B
dep.md 228B
pos.md 225B
tok.md 224B
mtl.md 223B
lem.md 222B
crf_constituency_parser.md 222B
transformer.md 218B
biaffine_sdp.md 207B
transformer_tagger.md 206B
rnn_ner.md 200B
biaffine_dep.md 199B
fasttext.md 195B
word2vec.md 193B
vocab.md 192B
structure.md 186B
task.md 183B
classifiers.md 183B
dictionary.md 183B
char_rnn.md 177B
char_cnn.md 177B
index.md 173B
span_bio.md 172B
span_rank.md 172B
mcws_dataset.md 167B
pas.md 166B
rnn_tagger.md 161B
dm.md 159B
torch_component.md 157B
biaffine_ner.md 154B
trie.md 152B
psd.md 152B
index.md 151B
eos.md 150B
constituency_dataset.md 149B
index.md 142B
conll_dataset.md 133B
lemmatizer.md 129B
eos.md 128B
tokenizer.md 127B
txt.md 126B
transform.md 124B
共 571 条
- 1
- 2
- 3
- 4
- 5
- 6
资源评论
csbysj2020
- 粉丝: 2752
- 资源: 5567
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- NSKeyValueObservationException如何解决.md
- 基于Java的环境保护与宣传网站论文.doc
- 前端开发中的JS快速排序算法原理及实现方法
- 常见排序算法概述及其性能比较
- 形状分类31-YOLO(v5至v11)、COCO、CreateML、Darknet、Paligemma、VOC数据集合集.rar
- 2018年最新 ECshop母婴用品商城新版系统(微商城+微分销+微信支付)
- BookShopTuto.zip
- 论文复现:结合 CNN 和 LSTM 的滚动轴承剩余使用寿命预测方法
- MySQL中的数据库管理语句-ALTER USER.pdf
- 冒泡排序算法解析及优化.md
- 2024年智算云市场发展与生态分析报告
- qwewq23132131231
- 《木兰诗》教学设计.docx
- 《台阶》教学设计.docx
- 《卖油翁》文言文教学方案.docx
- 《老王》教学设计方案.docx
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功