package com.beibei.webapp.search.index;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.ResultSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.htmlparser.Parser;
import com.beibei.webapp.model.BbProduct;
/**
* <p>
* ClassName : IndexManager.java
* </p>
* <p>
* Title : TODO
* </p>
* <p>
* Description : TODO
* </p>
* <p>
* Copyright : beibei.
* </p>
* <p>
* CreateTime : MAR 8, 2010 10:35:42 AM
*
* @author : minami
* @version : 1.0
*/
public class IndexManager {
/**
* 将ID写入到磁盘文件中
*/
boolean indexFlag = false;
public static boolean writeStoreId(String path, String storeId) {
boolean b = false;
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(path);
PrintWriter out = new PrintWriter(fw);
out.write(storeId);
out.close();
fw.close();
b = true;
} catch (IOException e) {
e.printStackTrace();
}
return b;
}
/**
* 取得存储在磁盘中的ID
*/
public static String getStoreId(String path) {
String storeId = "";
try {
File file = new File(path);
if (!file.exists()) {
file.createNewFile();
}
FileReader fr = new FileReader(path);
BufferedReader br = new BufferedReader(fr);
storeId = br.readLine();
if (storeId == null || storeId == "")
storeId = "0";
br.close();
fr.close();
} catch (Exception e) {
e.printStackTrace();
}
return storeId;
}
/**
* Add one document to the lucene index
*
* @throws SQLException
* @throws ParserException
* @throws IOException
* @throws CorruptIndexException
*/
/**
* 为数据库建立索引。 首先要定义一个IndexWriter(),它是将索引写进Lucene自己的数据库中,它存放的位置是有你自己定义的。
* 在定义IndexWriter 是需要指定它的分析器。 getAnalyzer()它作用是对文本进行分析,判断如何进行切词。
* 接着,要定义一个Document。Document相当于二维表中一行数 据一样。
* Document里包含的是Field字段,Field相当于数据库中一列,也就是一个属性,一个字段。
* 最后应该对IndexWriter进行优化,方法很简单,就是writer.optimize().
*/
@SuppressWarnings("unchecked")
public boolean createIndex(Directory directory, String storeIdPath,
List productList, Analyzer analyzer, Map<Integer, String> map) {// 把RS换成LIST原理一样
IndexWriter iwriter = null;
try {
boolean empty = isEmpty(storeIdPath);
iwriter = new IndexWriter(directory, analyzer, empty,
IndexWriter.MaxFieldLength.UNLIMITED);
iwriter.setMergeFactor(100);
iwriter.setMaxBufferedDocs(100);
addIndex(iwriter, analyzer, productList, storeIdPath, map);
return true;
} catch (Exception e) {
e.printStackTrace();
System.out.println("出错了" + e.getClass() + "\n 错误信息为: "
+ e.getMessage());
return false;
} finally {
try {
iwriter.optimize();
iwriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
@SuppressWarnings("unchecked")
public boolean createIndex(Directory directory, String storeIdPath,
ResultSet rs, Analyzer analyzer, Map<String, String> map) {// 把RS换成LIST原理一样
IndexWriter iwriter = null;
try {
boolean empty = isEmpty(storeIdPath);
System.out.println(empty);
iwriter = new IndexWriter(directory, analyzer, empty,
new IndexWriter.MaxFieldLength(100));
iwriter.setMergeFactor(100);
iwriter.setMaxBufferedDocs(100);
addIndex(iwriter, analyzer, rs, storeIdPath, map);
return true;
} catch (Exception e) {
e.printStackTrace();
System.out.println("出错了" + e.getClass() + "\n 错误信息为: "
+ e.getMessage());
return false;
} finally {
try {
iwriter.optimize();
iwriter.close();
rs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/*
* 增加索引
*/
@SuppressWarnings("unchecked")
public void addIndex(IndexWriter iwriter, Analyzer analyzer, ResultSet rs,
String storeIdPath, Map<String, String> map) {
try {
String storeId = getStoreId("/index/storeId.txt");
int id = Integer.parseInt(storeId);
while (rs.next()) {
Document d = new Document();
storeId = rs.getString("PRODUCTID");
if (Integer.parseInt(storeId) > id) {
Parser parser = new Parser();
parser.setInputHTML("" + rs.getString("PRODUCTDESCRIBE"));
String name = rs.getString("PRODUCTNAME");
if (null == name) {
name = "wrong-product-name";
}
String strings = parser.parse(null).elementAt(0)
.toPlainTextString().trim();
String memberName = map.get(rs.getString("MEMBERID"));
d.add(new Field("id", "" + rs.getString("PRODUCTID"),
Field.Store.YES, Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
d.add(new Field("name", name, Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
d.add(new Field("describe", strings, Field.Store.NO,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
d.add(new Field("cateId", "" + rs.getString("CATEID"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("memberName", memberName, Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
d.add(new Field("price", "" + rs.getString("PRICE"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("outDate", "" + rs.getString("OUTDATE"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("leastOrder", ""
+ rs.getString("LEASTORDER"), Field.Store.YES,
Field.Index.NO, Field.TermVector.NO));
d.add(new Field("area", "" + rs.getString("AREA"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("total", "" + rs.getString("TOTAL"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("image", "" + rs.getString("IMAGE"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
d.add(new Field("unit", "" + rs.getString("UNIT"),
Field.Store.YES, Field.Index.NO,
Field.TermVector.NO));
iwriter.addDocument(d);
indexFlag = true;
storeId = rs.getString("PRODUCTID");
} else {
continue;
}
}
if (indexFlag) {
// 将最后一个的ID存到磁盘文件中
writeStoreId(storeIdPath, storeId);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void addIndex(IndexWriter iwriter, Analyzer analyzer,
List<BbProduct> productList, String storeIdPath,
Map<Integer, String> map) {
try {
String storeId = getStoreId("/index/storeId.txt");
int id = Integer.parseInt(storeId);
for (BbProduct bp : productList) {
Document d = new Document();
if (bp.getProductId() > id) {
Parser parser = new Parser();
parser.setInputHTML("" + bp.getProductDescribe());
String name = bp.getProductName();
if (null == name) {
name = "wrong-product-name";
}
String strings = parser.parse(null).elementAt(0)
.toPlainTextString().trim();
String memberName = map.get(bp.getMemberId());
- 1
- 2
- 3
前往页