人工智能-项目实践-信息检索-专利检索系统Spring

共215个文件

java：78个

js：35个

class：22个

版权申诉

spring

人工智能

信息检索

JavaScript

40 浏览量 2024-02-29 21:05:04 上传评论收藏 3.33MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

人工智能-项目实践-信息检索-专利检索系统Spring （215个子文件）

SearchController.class 7KB

PDFHelper.class 7KB

TechnicalDataChartController.class 7KB

Nlpir.class 4KB

ChartController.class 3KB

StringHelper.class 3KB

CheckHelper.class 3KB

IndexController.class 2KB

ClusterController.class 2KB

Constants.class 2KB

ReadConfigUtil.class 2KB

FileHelper.class 2KB

ICTCLASTokenizer.class 2KB

HibernateUtil.class 1KB

ICTCLASAnalyzer.class 1KB

Nlpir$CLibrary.class 901B

IndicatorData.class 662B

SystemParas.class 530B

ClusterValueItem.class 506B

IndicatorValueItem.class 472B

IndicatorParam.class 418B

PatentMatrix.class 396B

.classpath 1KB

org.eclipse.wst.common.component 651B

org.eclipse.wst.jsdt.ui.superType.container 49B

jquery-ui.min.css 29KB

jquery.fancybox.css 5KB

all.css 1KB

index.css 1KB

search.css 615B

administrator.css 492B

indicator.css 448B

result.css 398B

client.css 357B

showFile.css 192B

.gitignore 31B

IKAnalyzer2012FF_u1.jar 1.11MB

jai_imageio-1.1-alpha.jar 729KB

paoding-analysis-4.4.0.jar 116KB

Cluster.java 24KB

GetPatentsFromNetwork.java 13KB

AccountController.java 13KB

Search.java 11KB

PatentService.java 9KB

SearchController.java 7KB

PDFHelper.java 7KB

Indicator.java 6KB

TrizDao.java 6KB

TechnicalDataChartController.java 6KB

SaveHtmlPatentsDataToMysql.java 5KB

Nlpir.java 5KB

PatentDao.java 4KB

Patent.java 4KB

ICTCLASTokenizer.java 4KB

ClassifyController.java 3KB

Index.java 3KB

DatabaseHelper.java 3KB

PatentWordTfDfDao.java 3KB

ClusterImpl.java 3KB

Constants.java 3KB

ClassificationDao.java 2KB

StringHelper.java 2KB

TotalDao.java 2KB

ChartController.java 2KB

ClassificationService.java 2KB

ICTCLASAnalyzer.java 2KB

PatentFeatureWordDao.java 2KB

PatentWordTfDfService.java 2KB

Account.java 2KB

CheckHelper.java 2KB

IndexController.java 2KB

ClusterController.java 2KB

PatentsAfterWordDivide.java 2KB

ReadConfigUtil.java 1KB

AccountDao.java 1KB

TrizController.java 1KB

PatentFeatureWord.java 1KB

PatentClassification.java 1KB

AccountService.java 1KB

PatentsAfterWordDivideDao.java 1KB

GetPatentsFromNetworkImpl.java 1KB

HibernateUtil.java 1KB

PatentFeatureWordService.java 1KB

FileHelper.java 1KB

PatentClusterDao.java 1KB

PatentWordTfDf.java 1KB

PatentsAfterWordDivideService.java 1KB

TrizService.java 1KB

SearchImpl.java 1KB

DataDao.java 1KB

SettingDao.java 1KB

WordInfoDao.java 1000B

PatentClusterService.java 862B

SettingService.java 816B

SaveHtmlPatentsDataToMysqlImpl.java 812B

DataService.java 718B

WordInfoService.java 707B

WordSmark.java 673B

PatentCluster.java 670B

WordSmarkDao.java 637B

共 215 条

package cn.edu.scut.patent.prework; import java.sql.Connection; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import ICTCLAS2014.Nlpir; import cn.edu.scut.patent.model.Data; import cn.edu.scut.patent.model.Patent; import cn.edu.scut.patent.model.PatentCluster; import cn.edu.scut.patent.model.PatentFeatureWord; import cn.edu.scut.patent.model.PatentMatrix; import cn.edu.scut.patent.model.PatentWordTfDf; import cn.edu.scut.patent.model.PatentsAfterWordDivide; import cn.edu.scut.patent.model.WordInfo; import cn.edu.scut.patent.prework.impl.ClusterImpl; import cn.edu.scut.patent.service.DataService; import cn.edu.scut.patent.service.PatentClusterService; import cn.edu.scut.patent.service.PatentFeatureWordService; import cn.edu.scut.patent.service.PatentService; import cn.edu.scut.patent.service.PatentWordTfDfService; import cn.edu.scut.patent.service.PatentsAfterWordDivideService; import cn.edu.scut.patent.service.StopwordService; import cn.edu.scut.patent.service.WordInfoService; import cn.edu.scut.patent.service.WordSmarkService; import cn.edu.scut.patent.util.Constants; import cn.edu.scut.patent.util.DatabaseHelper; import cn.edu.scut.patent.util.StringHelper; /** * 对专利文本进行K-means文本聚类处理 * * @author CJX * */ public class Cluster implements ClusterImpl { public static Connection con; public Map<String, PatentWordTfDf> titleWordDic; public Map<String, PatentWordTfDf> abstractWordDic; public Map<String, PatentWordTfDf> contentWordDic; int count = 0; public Cluster() { try { con = DatabaseHelper.getConnection(); } catch (Exception e) { e.printStackTrace(); } } public void doCluster() { long totalStartTime = new Date().getTime();// 总的开始的时间 String result = ""; Cluster cluster = new Cluster(); // 如果数据表PATENT_WORD_TF_DF不为空，则跳过下述函数，不再浪费资源重复计算。 if (PatentWordTfDfService.isEmpty()) { PatentsAfterWordDivideService.cleanTable(); StopwordService.cleanTable(); WordSmarkService.cleanTable(); long startTime = new Date().getTime();// 开始的时间 System.out.println("将名称和摘要分词并存入patent_word_after_divide中"); // 将名称和摘要分词并存入patent_word_after_divide cluster.divideWordToDb(); String timeConsume1 = "1.花费了" + StringHelper.timer(startTime) + "完成专利名称和专利摘要的分词和过滤！"; System.out.println(timeConsume1); result += timeConsume1 + "\n"; } // 如果数据表T_WORD_INFO不为空，则跳过下述函数，不再浪费资源重复计算。 if (WordInfoService.isEmpty()) { PatentWordTfDfService.cleanTable(); long startTime = new Date().getTime();// 开始的时间 // 计算TF，存入Map中 cluster.countTF(); // 将Map中的数据(即TF)存入到patent_word_tf_df cluster.saveWordDicToDatabase(); // 更新patent_word_tf_df中的DF值 cluster.countDF(); String timeConsume2 = "2.花费了" + StringHelper.timer(startTime) + "完成统计在名称和摘要中某个词语出现的频率、存入数据库、和计算文档频数DF！"; System.out.println(timeConsume2); result += timeConsume2 + "\n"; } // 如果数据表PATENT_FEATURE_WORD不为空，则跳过下述函数，不再浪费资源重复计算。 if (PatentFeatureWordService.isEmpty()) { WordInfoService.cleanTable(); long startTime = new Date().getTime();// 开始的时间 // 保存（word,maxTF,DF）值到t_word_info cluster.extractFeatureWord(); String timeConsume3 = "3.花费了" + StringHelper.timer(startTime) + "完成计算所有词权重MaxTf，根据权重提取特证词并保存到数据表 T_WORD_INFO中！"; System.out.println(timeConsume3); result += timeConsume3 + "\n"; } // 如果数据表PATENT_CLUSTER不为空，则跳过下述函数，不再浪费资源重复计算。 if (PatentClusterService.isEmpty()) { PatentFeatureWordService.cleanTable(); long startTime = new Date().getTime();// 开始的时间 cluster.countAndSaveToDb(20); cluster.countStandardTFIDF(); String timeConsume4 = "4.花费了" + StringHelper.timer(startTime) + "完成计算并规范化TF-IDF值，提取前20位存入数据表PATENT_FEATURE_WORD中！"; System.out.println(timeConsume4); result += timeConsume4 + "\n"; } // 如果数据表PATENT_CLUSTER不为空，则跳过下述函数，不再浪费资源重复计算。 if (PatentClusterService.isEmpty()) { long startTime = new Date().getTime();// 开始的时间 cluster.clusterByQYJ(20); String timeConsume5 = "5.花费了" + StringHelper.timer(startTime) + "完成聚类过程！"; System.out.println(timeConsume5); result += timeConsume5 + "\n"; } String timeConsumeTotal = "总共花费了" + StringHelper.timer(totalStartTime) + "完成整个过程，great！"; result += timeConsumeTotal + "\n"; System.out.println(result); } public void divideWordToDb() { List<Patent> list = new PatentService().getPatentsKey(); PatentsAfterWordDivide pttAWDM; int count = 0; for (Patent patent : list) { pttAWDM = new PatentsAfterWordDivide(); pttAWDM.setPttNum(patent.getPttNum()); pttAWDM.setPttDate(patent.getPttDate()); pttAWDM.setClassNumG06Q(patent.getClassNumG06Q()); // 将专利名称和摘要分词 pttAWDM.setPttNameDivided(Nlpir.doNlpirString(patent.getPttName(), 0, null, null)); pttAWDM.setPttAbstractDivided(Nlpir.doNlpirString( patent.getPttAbstract(), 0, null, null)); // 存入到patents_after_word_divide数据表 new PatentsAfterWordDivideService().save(pttAWDM); count++; // ************************************* // 用于限制聚类的个数 if (Constants.CLUSTER_LIMIT > 0) { if (count >= Constants.CLUSTER_LIMIT) { break; } } // ************************************* } } public void countTF() { List<PatentsAfterWordDivide> list = new PatentsAfterWordDivideService() .getAllPatentsAfterWordDivide(); PatentsAfterWordDivide pawd; String[] titleArr; String[] abstractArr; String[] contentArr; titleWordDic = new HashMap<String, PatentWordTfDf>(); abstractWordDic = new HashMap<String, PatentWordTfDf>(); contentWordDic = new HashMap<String, PatentWordTfDf>(); // 数据集循环 for (PatentsAfterWordDivide patentsAfterWordDivide : list) { pawd = patentsAfterWordDivide; String tempStr; // 以空格切开专利名词（折扣/n 卡/n 系统/n ） titleArr = pawd.getPttNameDivided().split(" "); for (int i = 0; i < titleArr.length; i++) { tempStr = titleArr[i]; PatentWordTfDf p; // 以word_专利名称为格式判断是否唯一，如果是唯一的tf为1 if (titleWordDic.get(tempStr + "_" + pawd.getPttNum()) == null) { p = new PatentWordTfDf(); p.setFlag(1); // 1为标题 p.setWord(tempStr); p.setPttNum(pawd.getPttNum()); titleWordDic.put(tempStr + "_" + pawd.getPttNum(), p); } // 如果不唯一，tf+1 else { p = titleWordDic.get(tempStr + "_" + pawd.getPttNum()); p.setTf(p.getTf() + 1); } } // 以空格切开专利摘要 abstractArr = pawd.getPttAbstractDivided().split(" "); for (int j = 0; j < abstractArr.length; j++) { tempStr = abstractArr[j]; if (tempStr.lastIndexOf("/") != -1) tempStr = tempStr.substring(0, tempStr.lastIndexOf("/")); PatentWordTfDf p; if (abstractWordDic.get(tempStr + "_" + pawd.getPttNum()) == null) { p = new PatentWordTfDf(); p.setFlag(0); // 0为摘要 p.setWord(tempStr); p.setPttNum(pawd.getPttNum()); abstractWordDic.put(tempStr + "_" + pawd.getPttNum(), p); } else { p = abstractWordDic.get(tempStr + "_" + pawd.getPttNum()); p.setTf(p.getTf() + 1); } } // 以空格切开专利说明书 contentArr = pawd.getPtt

评论收藏

内容反馈

版权申诉