股吧情绪倾向量化DEMO（毕业设计+机器学习课程project）.zip资源-CSDN文库

共93个文件

java：23个

xml：19个

jsp：11个

10 浏览量 2024-01-05 15:45:34 上传评论收藏 1.07MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

股吧情绪倾向量化DEMO（毕业设计+机器学习课程project）.zip （93个子文件）

haah

.gitattributes 378B

src

cidian

stopword.dic 129B

guba.dic 7KB

股票词汇大全.dic 139KB

股票基金2.dic 169KB

股票基金.dic 80KB

IKAnalyzer.cfg.xml 536B

test

Spider.java 4KB

TxtValue.java 487B

CiPing.java 2KB

jisuan.java 2KB

DataClassify_old.java 6KB

IKAnalyzerTest.java 15KB

DataPreProcess_old.java 14KB

AttributeSelectionTest.java 4KB

TezhenXuanze.java 9KB

WebSpider_old.java 5KB

TezhenXuanze_副本.java 21KB

TezhenXuanze_old.java 20KB

Servlet

ResultServlet.java 647B

PaquServlet.java 1KB

LiangHuaServlet.java 1KB

TrainServlet.java 1KB

product

WebSpider.java 5KB

DataPreProcess.java 10KB

DataClassify.java 6KB

DataClassify_old.java 6KB

DataSample.java 1KB

Quantization.java 4KB

cidianfa.java 2KB

exportToHTML

股吧情绪倾向分析平台.iml.html 6KB

index.html 186B

out

artifacts

bishe

fonts.css 19KB

WEB-INF

web.xml 1KB

index.jsp 2KB

tbzs.jsp 2KB

default.css 10KB

ichart.js 95KB

bootstrap.css 123KB

sjxl.jsp 2KB

images

pic02.jpg 2KB

logo.jpg 8KB

pic01.jpg 81KB

result.jsp 9KB

fonts

FontAwesome.otf 62KB

fontawesome-webfont.eot 37KB

fontawesome-webfont.woff 43KB

fontawesome-webfont.svg 198KB

fontawesome-webfont.ttf 79KB

qxlh.jsp 2KB

web_war_exploded

WEB-INF

web.xml 313B

index.jsp 175B

production

股吧情绪倾向分析平台

cidian

stopword.dic 129B

guba.dic 7KB

股票词汇大全.dic 139KB

股票基金2.dic 169KB

股票基金.dic 80KB

IKAnalyzer.cfg.xml 536B

web

fonts.css 19KB

WEB-INF

web.xml 1KB

index.jsp 2KB

tbzs.jsp 2KB

default.css 10KB

ichart.js 95KB

bootstrap.css 123KB

sjxl.jsp 2KB

images

pic02.jpg 2KB

logo.jpg 8KB

pic01.jpg 81KB

result.jsp 9KB

fonts

FontAwesome.otf 62KB

fontawesome-webfont.eot 37KB

fontawesome-webfont.woff 43KB

fontawesome-webfont.svg 198KB

fontawesome-webfont.ttf 79KB

qxlh.jsp 2KB

.idea

project-template.xml 186B

description.html 36B

uiDesigner.xml 9KB

artifacts

bishe.xml 776B

scopes

scope_settings.xml 139B

libraries

jsoup_1_7_2.xml 217B

weka.xml 449B

IKAnalyzer2012.xml 319B

vcs.xml 166B

workspace.xml 58KB

misc.xml 5KB

compiler.xml 711B

modules.xml 302B

encodings.xml 264B

profiles_settings.xml 111B

.gitignore 803B

股吧情绪倾向分析平台.iml 1KB

package test; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.wltea.analyzer.lucene.IKAnalyzer; import java.io.*; import java.util.*; import java.util.regex.Pattern; import static java.lang.Math.log; import static java.lang.Math.pow; /** * Created by yizhou on 14-4-9. */ public class TezhenXuanze_副本 { private static Map<String,Integer> WordList = new HashMap<String, Integer>();//所有词 private static Map<String,Integer> ClassList = new HashMap<String,Integer>();//所有分类 // private static Map<String,Map<Integer,Double>> WordFeatureValue = new HashMap<String, Map<Integer, Double>>();//用来存放所有类的互信息 private static Map<String,Double> WordFeatureValue = new HashMap<String, Double>(); private static List<String> KeyWord = new ArrayList<String>(); private static int wordnum=0; private static int classnum=0; private static Pattern pattern = Pattern.compile("[0-9]{6}"); private static Pattern pattern1 = Pattern.compile("[0-9]{1,10}"); // basefilepath为根目录，里面包含所有分类（如看多目录，看空目录等）filepath[]为一级子目录，每个filepath[i]目录里包含该分类下所有文本 // 输入的文本为未经分词的原始文本 public static void Product_WordMatrix(File basefilepath,File writepath) throws IOException { IKAnalyzer analyzer = new IKAnalyzer(true); File [] filepath = basefilepath.listFiles(); Integer count = 0; classnum = filepath.length; for(int i=0;i<classnum;i++){ ClassList.put(filepath[i].getName(),i); File []files = filepath[i].listFiles();//存储子目录下所有文件 File path = new File(writepath.getPath()+"\\"+filepath[i].getName());//生成类别目录 if(!path.exists()) path.mkdirs(); int dayux00=0; for(File filename:files){ File writefile=new File(path.getPath()+"\\"+filename.getName());//将原始文档转化为矩阵形式的词文档 FileWriter fw=new FileWriter(writefile); BufferedWriter writer=new BufferedWriter(fw); InputStreamReader reader = new InputStreamReader(new FileInputStream(filename)); // 建立一个输入流对象reader BufferedReader br = new BufferedReader(reader); // 建立一个对象，它把文件内容转成计算机能读懂的语言 String line = br.readLine(); while (line != null) { count++; line = shaixuan(line,filename.getName()); if(line == null) {dayux00++;line=br.readLine();continue;} String[] lines = line.split("[。？?]"); // for(int it=0;it<lines.length;it++){ //长句子分为短句子 // TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(lines[it])); TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(line)); tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()){ String word = tokenStream.getAttribute(CharTermAttribute.class).toString(); writer.write(word + "\t"); if(!pattern1.matcher(word).find()) { WordList.put(word, wordnum); wordnum++; } } writer.write(count.toString()); writer.write("\r\n"); writer.flush(); // } line = br.readLine(); } writer.close(); reader.close(); } System.out.println("类别：" + filepath[i].getName() + "过滤数目:" + dayux00+":"+count); } } public static String shaixuan(String line,String filename){ line = line.replaceAll("[+~$`^=|<>～`$^+=|<>￥×]",""); if(line.indexOf("评级")!=-1){ line=line.replaceAll("[“”\"的]", ""); if(line.indexOf("评级")>5) line = line.substring(line.indexOf("评级")-6,line.indexOf("评级")); else line.substring(0,line.indexOf("评级")); } else if(line.indexOf("投资建议")!=-1){ if(line.indexOf("风险提示")!=-1) line = line.substring(line.indexOf("投资建议"),line.indexOf("风险提示")); else line = line.substring(line.indexOf("投资建议")); } else if(line.indexOf("研报")!=-1&&line.indexOf("评级")==-1) line=null; else if(line.indexOf("公告日期")!=-1||(pattern.matcher(line).find()&&!(line.indexOf(filename.substring(0,5))!=-1))) line=null; return line; } public static void Product_WordMatrix_Test(File basefilepath,File writepath) throws IOException { IKAnalyzer analyzer = new IKAnalyzer(true); File [] filepath = basefilepath.listFiles(); Integer count = 0; for(int i=0;i<filepath.length;i++){ File []files = filepath[i].listFiles();//存储子目录下所有文件 File path = new File(writepath.getPath()+"\\"+filepath[i].getName());//生成类别目录 if(!path.exists()) path.mkdirs(); int dayux00=0; for(File filename:files){ File writefile=new File(path.getPath()+"\\"+filename.getName());//将原始文档转化为矩阵形式的词文档 FileWriter fw=new FileWriter(writefile); BufferedWriter writer=new BufferedWriter(fw); InputStreamReader reader = new InputStreamReader(new FileInputStream(filename)); // 建立一个输入流对象reader BufferedReader br = new BufferedReader(reader); // 建立一个对象，它把文件内容转成计算机能读懂的语言 String line = br.readLine(); while (line != null) { count++; line = shaixuan(line,filename.getName()); if(line == null) {dayux00++;line=br.readLine();continue;} String[] lines = line.split("[。？?]"); // for(int it=0;it<lines.length;it++){ //长句子分为短句子 // TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(lines[it])); TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(line)); tokenStream.addAttribute(CharTermAttribute.class); while (tokenStream.incrementToken()){ String word = tokenStream.getAttribute(CharTermAttribute.class).toString(); writer.write(word + "\t"); } writer.write(count.toString()); writer.write("\r\n"); writer.flush(); // } line = br.readLine(); } writer.close(); reader.close(); } System.out.println("测试类别："+filepath[i].getName()+"\t过滤数目:"+dayux00); } } public static void Product_ZiMatrix(File basefilepath,File writepath) throws IOException { File [] filepath = basefilepath.listFiles(); classnum = filepath.length; for(int i=0;i<classnum;i++){ ClassList.put(filepath[i].getName(),i); File []files = filepath[i].listFiles();//存储子目录下所有文件 File path = new File(writepath.getPath()+"\\"+filepath[i].getName(

评论收藏

内容反馈