基于Python和Java的hanLP自然语言处理设计源码资源-CSDN文库

共654个文件

py：475个

md：150个

java：13个

版权申诉

Python

自然语言处理

Java

hanLP

源码

38 浏览量 2024-10-03 04:08:10 上传评论收藏 2.91MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于Python和Java的hanLP自然语言处理设计源码（654个子文件）

references.bib 42KB

.gitignore 4KB

HanLPClient.java 24KB

HanLPClientTest.java 9KB

MeaningRepresentationTest.java 3KB

Span.java 1KB

CoreferenceResolutionOutput.java 802B

BaseInput.java 674B

TokenInput.java 635B

SentenceInput.java 632B

MeaningRepresentation.java 626B

DocumentInput.java 624B

Node.java 531B

Edge.java 469B

Anchor.java 448B

LICENSE 11KB

Makefile 634B

msr.md 135KB

ctb.md 46KB

863.md 21KB

pku.md 19KB

semeval16.md 16KB

ud.md 9KB

ctb.md 9KB

pku.md 8KB

ptb.md 7KB

npcmj.md 6KB

ctb.md 6KB

data_format.md 5KB

tutorial.md 5KB

install.md 5KB

sd_zh.md 5KB

sd_en.md 4KB

pmt.md 3KB

msra.md 3KB

cpb.md 3KB

configure.md 3KB

npcmj.md 3KB

resources.md 3KB

propbank.md 2KB

ontonotes.md 2KB

word2vec.md 2KB

index.md 2KB

ud.md 2KB

contributing.md 2KB

pull_request_template.md 1KB

mlm.md 1KB

constituency.md 1KB

amr2text.md 1KB

dataset.md 1KB

pos.md 1KB

tok.md 1KB

bug_report.md 1KB

amr.md 1KB

resources.md 996B

resources.md 971B

restful_java.md 963B

sts.md 931B

srl.md 918B

resources.md 914B

index.md 839B

restful_golang.md 836B

feature_request.md 664B

resources.md 493B

index.md 400B

resources.md 356B

ud.md 303B

multi_criteria.md 292B

biaffine_ner.md 270B

rank_srl.md 265B

bio_srl.md 264B

tag_ner.md 264B

embedding.md 257B

ud_parser.md 256B

sdp.md 245B

transformer.md 243B

constituency.md 236B

transformer_ner.md 232B

biaffine_ner.md 231B

dep.md 228B

pos.md 225B

tok.md 224B

mtl.md 223B

lem.md 222B

crf_constituency_parser.md 222B

transformer.md 218B

biaffine_sdp.md 207B

transformer_tagger.md 206B

rnn_ner.md 200B

biaffine_dep.md 199B

pipeline.md 198B

fasttext.md 195B

word2vec.md 193B

vocab.md 192B

structure.md 186B

mcws_dataset.md 184B

task.md 183B

classifiers.md 183B

dictionary.md 183B

index.md 181B

共 654 条

/* * <author>Han He</author> * <email>me@hankcs.com</email> * <create-date>2020-12-26 11:54 PM</create-date> * * <copyright file="HanLPClient.java"> * Copyright (c) 2020, Han He. All Rights Reserved, http://www.hankcs.com/ * See LICENSE file in the project root for full license information. * </copyright> */ package com.hankcs.hanlp.restful; import com.fasterxml.jackson.databind.ObjectMapper; import com.hankcs.hanlp.restful.mrp.MeaningRepresentation; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.HttpURLConnection; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.*; /** * A RESTful client implementing the data format specification of HanLP. * * @author hankcs * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public class HanLPClient { private String url; private String auth; private String language; private int timeout; private ObjectMapper mapper; /** * @param url An API endpoint to a service provider. * @param auth An auth key licenced by a service provider. * @param language The language this client will be expecting. Contact the service provider for the list of * languages supported. Conventionally, zh is used for Chinese and mul for multilingual. * Leave null to use the default language on server. * @param timeout Maximum waiting time in seconds for a request. */ public HanLPClient(String url, String auth, String language, int timeout) { if (auth == null) { auth = System.getenv().getOrDefault("HANLP_AUTH", null); } this.url = url; this.auth = auth; this.language = language; this.timeout = timeout * 1000; this.mapper = new ObjectMapper(); } /** * @param url An API endpoint to a service provider. * @param auth An auth key licenced by a service provider. */ public HanLPClient(String url, String auth) { this(url, auth, null, 5); } /** * Parse a raw document. * * @param text Document content which can have multiple sentences. * @param tasks Tasks to perform. * @param skipTasks Tasks to skip. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String text, String[] tasks, String[] skipTasks) throws IOException { //noinspection unchecked return mapper.readValue(post("/parse", new DocumentInput(text, tasks, skipTasks, language)), Map.class); } /** * Parse a raw document. * * @param text Document content which can have multiple sentences. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String text) throws IOException { return parse(text, null, null); } /** * Parse an array of sentences. * * @param sentences Multiple sentences to parse. * @param tasks Tasks to perform. * @param skipTasks Tasks to skip. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String[] sentences, String[] tasks, String[] skipTasks) throws IOException { //noinspection unchecked return mapper.readValue(post("/parse", new SentenceInput(sentences, tasks, skipTasks, language)), Map.class); } /** * Parse an array of sentences. * * @param sentences Multiple sentences to parse. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String[] sentences) throws IOException { return parse(sentences, null, null); } /** * Parse an array of pre-tokenized sentences. * * @param tokens Multiple pre-tokenized sentences to parse. * @param tasks Tasks to perform. * @param skipTasks Tasks to skip. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String[][] tokens, String[] tasks, String[] skipTasks) throws IOException { //noinspection unchecked return mapper.readValue(post("/parse", new TokenInput(tokens, tasks, skipTasks, language)), Map.class); } /** * Parse an array of pre-tokenized sentences. * * @param tokens Multiple pre-tokenized sentences to parse. * @return Parsed annotations. * @throws IOException HTTP exception. * @see <a href="https://hanlp.hankcs.com/docs/data_format.html">Data Format</a> */ public Map<String, List> parse(String[][] tokens) throws IOException { return parse(tokens, null, null); } /** * Split a document into sentences and tokenize them. * * @param text A document. * @param coarse Whether to perform coarse-grained or fine-grained tokenization. * @return A list of tokenized sentences. * @throws IOException HTTP exception. */ public List<List<String>> tokenize(String text, Boolean coarse) throws IOException { String[] tasks; if (coarse != null) { if (coarse) tasks = new String[]{"tok/coarse"}; else tasks = new String[]{"tok/fine"}; } else tasks = new String[]{"tok"}; Map<String, List> doc = parse(text, tasks, null); //noinspection unchecked return doc.values().iterator().next(); } /** * Split a document into sentences and tokenize them using fine-grained standard. * * @param text A document. * @return A list of tokenized sentences. * @throws IOException HTTP exception. */ public List<List<String>> tokenize(String text) throws IOException { return tokenize(text, null); } /** * Text style transfer aims to change the style of the input text to the target style while preserving its content. * * @param text Source text. * @param targetStyle Target style. * @return Text of the target style. */ public List<String> textStyleTransfer(List<String> text, String targetStyle) throws IOException { Map<String, Object> input = new HashMap<>(); input.put("text", text); input.put("target_style", targetStyle); input.put("language", language); //noinspection unchecked return mapper.readValue(post("/text_style_transfer", input), List.class); } /** * Text style transfer aims to change the style of the input text to the target style while preserving its content. * * @param text Source text. * @param targetStyle Target style. * @return Text of the target style. */ public String textStyleTransfer(String text, String targetStyle) throws IOException { Map<String, Object> input = new HashMap<>(); input.put("text", text); input.put("target_style", targetStyle); input.put("language", language); return mapper.readValue(post("/text_style_transfer", input), String.class); } /** * Grammatical Error Correction (GEC) is the task of correcting different kinds of errors in text such as * spelling, punctuation, grammatical, and word choice errors. * * @param text Text potentially containing diffe

评论收藏

内容反馈

版权申诉