/*
* <author>Han He</author>
* <email>me@hankcs.com</email>
* <create-date>2020-12-26 11:54 PM</create-date>
*
* <copyright file="HanLPClient.java">
* Copyright (c) 2020, Han He. All Rights Reserved, http://www.hankcs.com/
* See LICENSE file in the project root for full license information.
* </copyright>
*/
package com.hankcs.hanlp.restful;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
/**
* A RESTful client implementing the data format specification of HanLP.
*
* @author hankcs
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public class HanLPClient
{
private String url;
private String auth;
private String language;
private int timeout;
private ObjectMapper mapper;
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
* @param language The language this client will be expecting. Contact the service provider for the list of
* languages supported. Conventionally, zh is used for Chinese and mul for multilingual.
* Leave null to use the default language on server.
* @param timeout Maximum waiting time in seconds for a request.
*/
public HanLPClient(String url, String auth, String language, int timeout)
{
if (auth == null)
{
auth = System.getenv().getOrDefault("HANLP_AUTH", null);
}
this.url = url;
this.auth = auth;
this.language = language;
this.timeout = timeout * 1000;
this.mapper = new ObjectMapper();
}
/**
* @param url An API endpoint to a service provider.
* @param auth An auth key licenced by a service provider.
*/
public HanLPClient(String url, String auth)
{
this(url, auth, null, 5);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new DocumentInput(text, tasks, skipTasks, language)), Map.class);
}
/**
* Parse a raw document.
*
* @param text Document content which can have multiple sentences.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String text) throws IOException
{
return parse(text, null, null);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new SentenceInput(sentences, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of sentences.
*
* @param sentences Multiple sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[] sentences) throws IOException
{
return parse(sentences, null, null);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @param tasks Tasks to perform.
* @param skipTasks Tasks to skip.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens, String[] tasks, String[] skipTasks) throws IOException
{
//noinspection unchecked
return mapper.readValue(post("/parse", new TokenInput(tokens, tasks, skipTasks, language)), Map.class);
}
/**
* Parse an array of pre-tokenized sentences.
*
* @param tokens Multiple pre-tokenized sentences to parse.
* @return Parsed annotations.
* @throws IOException HTTP exception.
* @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a>
*/
public Map<String, List> parse(String[][] tokens) throws IOException
{
return parse(tokens, null, null);
}
private String post(String api, BaseInput input_) throws IOException
{
URL url = new URL(this.url + api);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setRequestMethod("POST");
if (auth != null)
con.setRequestProperty("Authorization", "Basic " + auth);
con.setRequestProperty("Content-Type", "application/json; utf-8");
con.setRequestProperty("Accept", "application/json");
con.setDoOutput(true);
con.setConnectTimeout(timeout);
con.setReadTimeout(timeout);
String jsonInputString = mapper.writeValueAsString(input_);
try (OutputStream os = con.getOutputStream())
{
byte[] input = jsonInputString.getBytes(StandardCharsets.UTF_8);
os.write(input, 0, input.length);
}
int code = con.getResponseCode();
if (code != 200)
{
throw new IOException(String.format("Request failed, status code = %d, error = %s", code, con.getResponseMessage()));
}
StringBuilder response = new StringBuilder();
try (BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream(), StandardCharsets.UTF_8)))
{
String responseLine;
while ((responseLine = br.readLine()) != null)
{
response.append(responseLine.trim());
}
}
return response.toString();
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
本项目为基于Python和Java开发的HanLP自然语言处理源码集,总计包含570个文件,其中Python源文件408个,文档类文件139个,Java源文件6个。功能涵盖中文分词、词性标注、命名实体识别、依存句法分析、语义依存分析、新词发现、关键词短语提取、自动摘要、文本分类聚类、拼音简繁转换等自然语言处理关键技术,旨在提供全面的自然语言处理解决方案。
资源推荐
资源详情
资源评论
收起资源包目录
基于Python和Java的HanLP自然语言处理设计源码 (571个子文件)
references.bib 20KB
.gitignore 4KB
hanlp.go 6KB
def.go 3KB
option.go 1KB
main_test.go 607B
HanLPClient.java 7KB
HanLPClientTest.java 2KB
BaseInput.java 674B
TokenInput.java 635B
SentenceInput.java 632B
DocumentInput.java 624B
LICENSE 11KB
Makefile 634B
pku.md 19KB
ctb.md 8KB
semeval16.md 8KB
sd.md 8KB
pku.md 8KB
ptb.md 7KB
863.md 6KB
ctb.md 6KB
data_format.md 5KB
tutorial.md 4KB
install.md 4KB
ud.md 4KB
msra.md 3KB
cpb.md 3KB
configure.md 3KB
resources.md 2KB
propbank.md 2KB
ontonotes.md 2KB
index.md 2KB
ud.md 2KB
contributing.md 2KB
bug_report.md 1KB
dataset.md 1KB
resources.md 971B
restful_java.md 959B
resources.md 951B
resources.md 914B
index.md 839B
feature_request.md 826B
resources.md 493B
index.md 362B
resources.md 356B
ud.md 303B
multi_criteria.md 292B
biaffine_ner.md 270B
rank_srl.md 265B
bio_srl.md 264B
tag_ner.md 264B
embedding.md 257B
ud_parser.md 256B
sdp.md 245B
transformer.md 243B
constituency.md 236B
transformer_ner.md 232B
biaffine_ner.md 231B
dep.md 228B
pos.md 225B
tok.md 224B
mtl.md 223B
lem.md 222B
crf_constituency_parser.md 222B
transformer.md 218B
biaffine_sdp.md 207B
transformer_tagger.md 206B
rnn_ner.md 200B
biaffine_dep.md 199B
fasttext.md 195B
word2vec.md 193B
vocab.md 192B
structure.md 186B
task.md 183B
classifiers.md 183B
dictionary.md 183B
char_rnn.md 177B
char_cnn.md 177B
index.md 173B
span_bio.md 172B
span_rank.md 172B
mcws_dataset.md 167B
pas.md 166B
rnn_tagger.md 161B
dm.md 159B
torch_component.md 157B
biaffine_ner.md 154B
trie.md 152B
psd.md 152B
index.md 151B
eos.md 150B
constituency_dataset.md 149B
index.md 142B
conll_dataset.md 133B
lemmatizer.md 129B
eos.md 128B
tokenizer.md 127B
txt.md 126B
transform.md 124B
共 571 条
- 1
- 2
- 3
- 4
- 5
- 6
资源评论
csbysj2020
- 粉丝: 2092
- 资源: 3620
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- Python基础语法-类(一).pdf
- 【Godot4自学手册】第四十九节创建刮风效果,利用GPUParticles2D实现粒子特效,国风效果
- Aurora混合协议 8B10B发送,6466接受数据
- 基于uds的bootloader开发,是已完成的项目,包括14229 uds诊断层协议栈,15765-2网络层协议栈,瑞萨RH8
- 神经网络从入门到精通教程
- notumor(无肿瘤)标注数据集,共549张数据集,包括图片和手动标注肿瘤的xml文件,可以直接使用labelimg打开
- 基于Java + HTTPClient 4.0,采用MySQL存储爬取数据,支持多进程并发执行的新浪微博爬虫
- abaqus曲线轨道有砟道床参振质量法,轮轨耦合,谐响应,五参数法
- 基于SSH(Struts2+Spring+Hibernate)搭建的失物招领平台,进行简单修改即可用于各高校失物招领
- 三相PWM整流器闭环仿真,电压电流双闭环控制,输出直流电压做外环 模型中包含主电路,坐标变,电压电流双环PI控制器,SVPWM控
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功