package com.sl.bbs.util;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.google.common.primitives.Bytes;
public class Util {
private static Logger logger = LoggerFactory.getLogger(Util.class);
public static String filePath = "hdfs://hdp4:9000/index/part-r-00000";
public static String sourcePath = "hdfs://hdp4:9000//tmp/1413181894365/part-r-00000";
public static Configuration hdfsConf = new Configuration();
public static Map<String, String> indexMap = new HashMap<String, String>();
public static Map<String, Map<Integer, String>> cacheMap = new HashMap<String, Map<Integer, String>>();
public static final int count = 5;
static {
FileSystem fs = null;
try {
Configuration conf = new Configuration();
fs = FileSystem.get(conf);
Path path = new Path(filePath);
StringBuilder builder = new StringBuilder();
if (fs.exists(path)) {
FSDataInputStream fsIn = fs.open(path);
FileStatus status = fs.getFileStatus(path);
byte[] buffer = new byte[Integer.parseInt(String.valueOf(status
.getLen()))];
fsIn.readFully(0, buffer);
fsIn.close();
fs.close();
builder.append(new String(buffer, "utf-8"));
}
String[] lines = builder.toString().split("\n");
for (String str : lines) {
String[] indexs = str.split("\t");
indexMap.put(indexs[0], indexs[1]);
}
} catch (IOException e) {
logger.debug(e.getMessage());
}
}
public static void main(String[] args) {
try {
long start = System.currentTimeMillis();
for (String str : queryDAR("二炮人次漂亮黑车")) {
System.out.println(str);
}
// System.out.println((System.currentTimeMillis() - start) /
// 1000.0);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static List<String> query(String key) throws IOException {
return queryDAR(key);
}
public static List<String> queryDAR(String keywords) throws IOException {
Map<Integer, String> newDrMap;
if (!cacheMap.containsKey(keywords)) {
List<String> keyList = participles(keywords);
Map<String, List<Long>> secIdexs = new HashMap<String, List<Long>>();
for (String str : keyList) {
if (indexMap.containsKey(str)) {
String[] value = indexMap.get(str).split(" ");
if (!secIdexs.containsKey(value[0])) {
List<Long> offList = new ArrayList<Long>();
offList.add(Long.parseLong(value[1]));
secIdexs.put(value[0], offList);
} else {
secIdexs.get(value[0]).add(Long.parseLong(value[1]));
}
}
}
Map<String, Float> drMap = new HashMap<String, Float>();
getDAR(secIdexs, drMap);
newDrMap = sortMap(drMap);
} else {
newDrMap = cacheMap.get(keywords);
}
int record = newDrMap.size();
int pageCount = (record + count - 1) / count;
long start = System.currentTimeMillis();
List<String> sourList = getSourceFile(newDrMap, 1);
System.out.println((System.currentTimeMillis() - start) / 1000.0);
return sourList;
}
public static List<String> getSourceFile(Map<Integer, String> newDrMap)
throws IOException {
List<String> list = new ArrayList<String>();
FileSystem fs = FileSystem.get(hdfsConf);
Path path = new Path(sourcePath);
if (fs.exists(path)) {
FSDataInputStream fsIn = fs.open(path);
for (Map.Entry<Integer, String> entry : newDrMap.entrySet()) {
fsIn.seek(Long.parseLong(entry.getValue()));
list.add(getLine(fsIn));
}
fsIn.close();
fs.close();
}
return list;
}
public static List<String> getSourceFile(Map<Integer, String> newDrMap,
int pageCount) throws IOException {
List<String> list = new ArrayList<String>();
FileSystem fs = FileSystem.get(hdfsConf);
Path path = new Path(sourcePath);
if (fs.exists(path)) {
FSDataInputStream fsIn = fs.open(path);
if (newDrMap.size() > (count * pageCount)) {
int start = (pageCount - 1) * count;
int end = pageCount * count;
for (int i = start; i < end; i++) {
fsIn.seek(Long.parseLong(newDrMap.get(i)));
list.add(getLine(fsIn));
}
} else {
int start = (pageCount - 1) * count;
for (int i = start; i < newDrMap.size(); i++) {
fsIn.seek(Long.parseLong(newDrMap.get(i)));
list.add(getLine(fsIn));
}
}
fsIn.close();
fs.close();
}
return list;
}
public static void getDAR(Map<String, List<Long>> secIdexs,
Map<String, Float> drMap) throws IOException {
for (Map.Entry<String, List<Long>> mapEntry : secIdexs.entrySet()) {
FileSystem fs = FileSystem.get(hdfsConf);
Path path = new Path(mapEntry.getKey());
if (fs.exists(path)) {
FSDataInputStream fsIn = fs.open(path);
for (Long off : mapEntry.getValue()) {
fsIn.seek(off);
String[] kis = fsIn.readLine().split("\t");
String[] muls = kis[1].split(";");
for (String str : muls) {
String[] drs = str.split(":");
if (!drMap.containsKey(drs[0])) {
drMap.put(drs[0], Float.valueOf(drs[1]));
} else {
drMap.put(drs[0],
drMap.get(drs[0]) + Float.valueOf(drs[1]));
}
}
}
fsIn.close();
}
}
}
public static List<String> participles(String keywords) throws IOException {
List<String> keyList = new ArrayList<String>();
Analyzer anal = new IKAnalyzer(true);
StringReader reader = new StringReader(keywords);
TokenStream ts = anal.tokenStream("", reader);
CharTermAttribute term = ts.getAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
keyList.add(term.toString());
}
reader.close();
return keyList;
}
public static Map sortMap(Map oldMap) {
ArrayList<Map.Entry<String, Float>> list = new ArrayList<Map.Entry<String, Float>>(
oldMap.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Float>>() {
public int compare(Entry<java.lang.String, Float> arg0,
Entry<java.lang.String, Float> arg1) {
return arg1.getValue().compareTo(arg0.getValue());
}
});
Map newMap = new LinkedHashMap();
for (int i = 0; i < list.size(); i++) {
// newMap.put(list.get(i).getKey(), list.get(i).getValue());
newMap.put(i, list.get(i).getKey());
}
return newMap;
}
public static Map sortMapOffs(Map oldMap) {
ArrayList<Map.Entry<Integer, String>> list = new ArrayList<Map.Entry<Integer, String>>(
oldMap.entrySet());
Collections.sort(list, new Comparator<Map.Entry<Integer, String>>() {
public int compare(Entry<Integer, String> arg0,
Entry<Integer, String> arg1) {
return Long.valueOf(arg0.getValue()).compareTo(Long.valueOf(arg1.getValue()));
// return arg1.getValue().compareTo(arg0.getValue());
}
});
Map newMap = new LinkedHashMap();
for (int i = 0; i < list.size(); i++) {
// newMap.put(list.get(i).getKey(), list.get(i).getValue());
newMap.put(i, list.get(i).getValue());
}
return newMap;
}
public static String getLine(FSDataInputStream fsIn) throws IOException {
List<Byte> list = new ArrayList<Byte>();
int count = 0;
int c = 0;
while ((c = fsIn.read()) != -1) {
if (c == 10 || c == 13) {
break;
}
list.add(Byte.valueO
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
BBSOnline.zip (49个子文件)
BBSOnline
.project 2KB
.settings
.jsdtscope 500B
org.eclipse.core.resources.prefs 66B
org.eclipse.wst.common.component 465B
org.eclipse.jdt.core.prefs 364B
org.eclipse.wst.common.project.facet.core.xml 252B
org.eclipse.wst.jsdt.ui.superType.container 49B
org.eclipse.wst.jsdt.ui.superType.name 6B
.mymetadata 300B
WebRoot
WEB-INF
web.xml 404B
lib
hadoop-hdfs-2.2.0.jar 5MB
guava-12.0.1.jar 1.71MB
commons-lang-2.6.jar 278KB
hbase-protocol-0.98.0-hadoop2.jar 3.14MB
hbase-client-0.98.0-hadoop2.jar 873KB
htrace-core-2.04.jar 31KB
commons-io-2.1.jar 159KB
commons-configuration-1.6.jar 292KB
slf4j-log4j12-1.7.5.jar 9KB
hadoop-common-2.2.0.jar 2.55MB
log4j-1.2.17.jar 478KB
hbase-common-0.98.0-hadoop2.jar 422KB
commons-cli-1.2.jar 40KB
hadoop-auth-2.2.0.jar 49KB
commons-el-1.0.jar 110KB
hbase-server-0.98.0-hadoop2.jar 3.17MB
lucene-core-4.3.0.jar 2.11MB
protobuf-java-2.5.0.jar 521KB
lucene-queryparser-4.3.0.jar 377KB
lucene-analyzers-common-4.3.0.jar 1.49MB
slf4j-api-1.7.5.jar 25KB
commons-logging-1.1.1.jar 59KB
hadoop-client-2.2.0.jar 2KB
IKAnalyzer2012FF_u1.jar 1.11MB
gson-2.2.4.jar 186KB
classes
com
sl
bbs
util
Util$1.class 1KB
Util$2.class 1KB
Util.class 10KB
BBS.class 1KB
core-site.xml 1KB
hdfs-site.xml 1KB
index.jsp 2KB
META-INF
MANIFEST.MF 36B
resultList.jsp 2KB
.myeclipse
src
com
sl
bbs
util
BBS.java 1KB
Util.java 8KB
core-site.xml 1KB
hdfs-site.xml 1KB
.classpath 489B
共 49 条
- 1
long1657
- 粉丝: 27
- 资源: 4
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
前往页