package com.wlld.myjecs.tools;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.wlld.myjecs.bean.BeanMangerOnly;
import com.wlld.myjecs.config.Config;
import com.wlld.myjecs.entity.business.*;
import org.wlld.entity.KeyWordForSentence;
import org.wlld.entity.SentenceModel;
import org.wlld.entity.WordTwoVectorModel;
import org.wlld.naturalLanguage.languageCreator.CatchKeyWord;
import org.wlld.naturalLanguage.word.MyKeyWord;
import org.wlld.naturalLanguage.word.WordEmbedding;
import org.wlld.rnnJumpNerveCenter.RRNerveManager;
import org.wlld.rnnJumpNerveCenter.RandomModel;
import org.wlld.rnnNerveCenter.ModelParameter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
public class Tools {
private void haveKeyWord(BeanMangerOnly beanMangerOnly, List<MySentence> sentences, boolean init) throws Exception {
File file = new File(Config.onlyKeyWord); //创建文件
Map<Integer, MyKeyWord> haveKeyWords = beanMangerOnly.getMyKeyWord();
if (!file.exists() || init) {//模型文件不存在重新学习
Map<Integer, List<KeyWordForSentence>> keyWordForSentenceMap = new HashMap<>();
for (MySentence sentence : sentences) {
List<KeyWordForSentence> keyWordForSentenceList;
MyKeyWord myKeyWord;
List<MyKeywordStudy> myKeywordStudyList = sentence.getMyKeywordStudyList();//关键词集合
String word = sentence.getWord();
for (MyKeywordStudy myKeywordStudy : myKeywordStudyList) {
int keyword_type_id = myKeywordStudy.getKeyword_type_id();//关键词id
if (haveKeyWords.containsKey(keyword_type_id)) {
keyWordForSentenceList = keyWordForSentenceMap.get(keyword_type_id);
} else {
keyWordForSentenceList = new ArrayList<>();
keyWordForSentenceMap.put(keyword_type_id, keyWordForSentenceList);
myKeyWord = new MyKeyWord(beanMangerOnly.getConfig(), beanMangerOnly.getWordEmbedding());
haveKeyWords.put(keyword_type_id, myKeyWord);
}
if (word != null) {
KeyWordForSentence keyWordForSentence = new KeyWordForSentence();
keyWordForSentence.setSentence(word);
keyWordForSentence.setKeyWord(myKeywordStudy.getKeyword());//不存在关键词也是一种训练,因为该模型的目的是关键词敏感性嗅探
keyWordForSentenceList.add(keyWordForSentence);
}
}
}
HaveAllKeyWord haveAllKeyWord = new HaveAllKeyWord();
List<HaveKey> haveKeyList = new ArrayList<>();
haveAllKeyWord.setHaveKeyList(haveKeyList);
for (Map.Entry<Integer, MyKeyWord> entry : haveKeyWords.entrySet()) {
HaveKey haveKey = new HaveKey();
int key = entry.getKey();
ModelParameter modelParameter = entry.getValue().study(keyWordForSentenceMap.get(key));
haveKey.setKey(key);
haveKey.setModelParameter(modelParameter);
haveKeyList.add(haveKey);
}
writeModel(JSONObject.toJSONString(haveAllKeyWord), Config.onlyKeyWord);
} else {//模型文件存在直接读
List<HaveKey> haveKeyList = readModelParameter().getHaveKeyList();//haveKeyWords
for (HaveKey haveKey : haveKeyList) {
MyKeyWord myKeyWord = new MyKeyWord(beanMangerOnly.getConfig(), beanMangerOnly.getWordEmbedding());
myKeyWord.insertModel(haveKey.getModelParameter());
haveKeyWords.put(haveKey.getKey(), myKeyWord);
}
}
}
private void keyWord(BeanMangerOnly beanMangerOnly, List<MySentence> sentences) throws IOException {//处理关键词
File file = new File(Config.KeyWordModelUrl); //创建文件
Map<Integer, CatchKeyWord> catchKeyWordMap = beanMangerOnly.catchKeyWord();
if (!file.exists()) {//重新学习
List<KeyWordModelMapping> keyWordModelMappings = new ArrayList<>();
Map<Integer, List<KeySentence>> sentenceMap = new HashMap<>();
for (MySentence sentence : sentences) {
List<MyKeywordStudy> myKeywordStudyList = sentence.getMyKeywordStudyList();
String word = sentence.getWord();
for (MyKeywordStudy myKeywordStudy : myKeywordStudyList) {
String keyWord = myKeywordStudy.getKeyword();//关键词
int key = myKeywordStudy.getKeyword_type_id();
if (word != null && keyWord != null) {
KeySentence keySentence = new KeySentence();
keySentence.setWord(word);
keySentence.setKeyword(keyWord);
keySentence.setKeyword_type_id(key);
if (sentenceMap.containsKey(key)) {
sentenceMap.get(key).add(keySentence);
} else {
List<KeySentence> sentenceList = new ArrayList<>();
sentenceList.add(keySentence);
sentenceMap.put(key, sentenceList);
}
}
}
}
for (Map.Entry<Integer, List<KeySentence>> entry : sentenceMap.entrySet()) {
List<KeySentence> sentenceList = entry.getValue();
int key = entry.getKey();
List<KeyWordForSentence> keyWordForSentenceList = new ArrayList<>();
CatchKeyWord catchKeyWord = new CatchKeyWord();
catchKeyWordMap.put(key, catchKeyWord);//TODO 吃内存
System.out.println("key:" + key);
for (KeySentence sentence : sentenceList) {
KeyWordForSentence keyWordForSentence = new KeyWordForSentence();
keyWordForSentence.setSentence(sentence.getWord());
keyWordForSentence.setKeyWord(sentence.getKeyword());
keyWordForSentenceList.add(keyWordForSentence);
}
catchKeyWord.study(keyWordForSentenceList);//耗时的过程
KeyWordModelMapping keyWordModelMapping = new KeyWordModelMapping();
keyWordModelMapping.setKey(key);
keyWordModelMapping.setKeyWordModel(catchKeyWord.getModel());
keyWordModelMappings.add(keyWordModelMapping);
}
MyWordModel model = new MyWordModel();
model.setKeyWordModelMappings(keyWordModelMappings);
//模型写出
writeModel(JSONObject.toJSONString(model), Config.KeyWordModelUrl);
} else {//TODO 读取模型
List<KeyWordModelMapping> keyWordModels = JSONObject.parseObject(readPaper(file), MyWordModel.class).getKeyWordModelMappings();
for (KeyWordModelMapping keyWordModelMapping : keyWordModels) {
int key = keyWordModelMapping.getKey();
CatchKeyWord catchKeyWord = new CatchKeyWord();
catchKeyWordMap.put(key, catchKeyWord);
catchKeyWord.insertModel(keyWordModelMapping.getKeyWordModel());
}
}
}
private void allKeyWord(BeanMangerOnly beanMangerOnly, List<MySentence> sentences) throws IOException {
File file = new File(Config.keyWordIndex);//关键词
AllKeyWords allKeyWords = beanMangerOnly.getAllKeyWords();
if (file.exists()) {
List<KeyWord> keyWords = JSONObject.parseObject(readPaper(file), AllKeyWords.class
Java程序员-张凯
- 粉丝: 1w+
- 资源: 7527
最新资源
- TongWeb V7.0 快速使用手册
- ZZU物联网工程专业数电实验整合
- C++、MFC实现类图的绘制功能,包含:图元的创建及对齐、图元的移动、图元的删除、图元的文字编辑.zip
- TongWeb V7.0 服务配置指南
- 新手运营源码 手机自适应二开骰子网站源码+免公众号+免签支付即时到账
- TongWeb V7.0 应用管理指南
- C++职工信息管理系统,基于MFC界面设计,系大一下学期课程设计.zip
- 一个 JavaScript 编写的可拖拽任务列表,通过监听 dragstart、dragend、dragover 和 drop 等事件,实现了任务项的拖拽和重新排序功能
- ZZU物联网工程专业Linux实验整合
- TongWeb V7.0 工具使用指南
- TongWeb V7.0 Commandstool使用指南
- YOLO摔倒检测ppt
- TongWeb V7.0 应用开发手册
- 数字图像处理与分析期末复习笔记
- 慧荣量产工具1,SM2258AB-MPQ0719A-FWQ0621A-Samsung
- TongWeb V7.0 等级保护指南
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈