重要！！数据集分类的一系列实验，（K-Means分类，p1,p2等），诸多多线程操资源-CSDN文库

共1436个文件

class：736个

java：563个

html：96个

需积分: 1 31 浏览量 2024-10-01 22:40:29 上传评论收藏 4.98MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

重要！！数据集分类的一系列实验，（K-Means分类，p1,p2等），诸多多线程操（1436个子文件）

Hadoop_BasicSinglePassIndexer.class 28KB

Manager.class 24KB

MemoryIndex.class 23KB

StructureMerger.class 22KB

Inv2DirectMultiReduce.class 21KB

CompressingMetaIndex.class 20KB

TwitterJSONDocument.class 20KB

LexiconBuilder.class 19KB

BasicSinglePassIndexer.class 16KB

InvertedIndexBuilder.class 16KB

FSOrderedMapFile.class 16KB

CompressingMetaIndexBuilder.class 16KB

Files.class 15KB

HadoopIndexing.class 15KB

Inverted2DirectIndexBuilder.class 14KB

IndexUtil.class 14KB

HadoopUtility.class 14KB

FSOMapFileLexicon.class 14KB

IndexOnDisk.class 14KB

BlockIndexer.class 13KB

TaggedDocument.class 13KB

BitPostingIndexInputFormat.class 13KB

SSSD.class 13KB

BlockInvertedIndexBuilder.class 13KB

Terrier4.class 13KB

TerrierQueryParser.class 12KB

Indexer.class 12KB

BasicIndexer.class 12KB

PostingListManager.class 12KB

QueryExpansion.class 11KB

K_Means.class 11KB

ApplicationSetup.class 11KB

MemoryFieldsIndex.class 11KB

DependenceScoreModifier.class 11KB

SimpleFileCollection.class 11KB

SimpleXMLCollection.class 10KB

Decorate.class 10KB

IntegerCodingPostingIndexInputStream.class 10KB

DFRBagExpansionTerms.class 10KB

Entry.class 10KB

QPRP.class 10KB

CompressingMetaIndexBuilder$MapperReducer.class 10KB

BitFile.class 10KB

SimpleStaticScoreModifier.class 9KB

BitPostingIndexInputStream.class 9KB

Classify.class 9KB

BaseMatching.class 9KB

IntegerCodecCompressionConfiguration.class 9KB

ExtensibleSinglePassIndexer.class 9KB

TerrierLexer.class 9KB

MatchingQueryTerms.class 9KB

Index.class 8KB

IncrementalIndex.class 8KB

InteractiveQuerying.class 8KB

CompressingMetaIndex$CompressingMetaIndexInputFormat.class 8KB

InvertedIndexRecompresser.class 8KB

BitPostingIndex.class 8KB

IntegerCodingPostingOutputStream.class 8KB

PorterStemmer.class 8KB

ArrayUtils.class 8KB

SimpleXMLCollection$XMLDocument.class 8KB

IntegerCodingIterablePosting.class 7KB

DirectIndex.class 7KB

MemoryCompressedMetaIndex.class 7KB

Inv2DirectMultiReduce$Inv2DirectMultiReduceJob.class 7KB

TRv2PorterStemmer.class 7KB

MultiFileCollectionInputFormat.class 7KB

FSOrderedMapFile$MultiFSOMapWriter.class 7KB

FSOrderedMapFile$SubMap.class 7KB

PerFieldNormWeightingModel.class 7KB

FSOMapFileLexiconOutputStream.class 7KB

BitOutputStream.class 7KB

TRECIndexing.class 7KB

MultiTermQuery.class 7KB

IntegerCodingPostingIndex.class 7KB

Create.class 7KB

MultiIndex.class 6KB

Hadoop_BlockSinglePassIndexer.class 6KB

FullNoPLM.class 6KB

CreateSingleLexicon.class 6KB

LexiconMerger.class 6KB

DFRWeightingModel.class 6KB

IndexToString.class 6KB

MemoryMetaIndexMap.class 6KB

PhraseScoreModifier.class 6KB

Distance.class 6KB

Create_Doc2.class 6KB

HeapSortInt.class 6KB

SingleFieldModel.class 6KB

AccumulatorResultSet.class 6KB

MemByteBuffer.class 6KB

RunsMerger.class 6KB

MemoryMetaIndex.class 6KB

TwitterJSONCollection.class 6KB

MapLexicon.class 6KB

Create_Doc2.class 6KB

BlockSinglePassIndexer.class 6KB

Relation.class 6KB

HadoopPlugin$HODJobFactory.class 5KB

DocumentPostingList.class 5KB

共 1436 条

/* * Terrier - Terabyte Retriever * Webpage: http://terrier.org/ * Contact: terrier{a.}dcs.gla.ac.uk * University of Glasgow - School of Computing Science * http://www.gla.ac.uk/ * * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is CompressingMetaIndex.java * * The Original Code is Copyright (C) 2004-2014 the University of Glasgow. * All Rights Reserved. * * Contributor(s): * Craig Macdonald <craigm{a.}dcs.gla.ac.uk> (original contributor) */ package org.terrier.structures; import gnu.trove.TObjectIntHashMap; import java.io.BufferedReader; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.RandomAccessFile; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; import java.util.regex.Pattern; import java.util.zip.DataFormatException; import java.util.zip.Inflater; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.terrier.sorting.HeapSortInt; import org.terrier.structures.collections.FSOrderedMapFile; import org.terrier.structures.collections.OrderedMap; import org.terrier.structures.seralization.FixedSizeIntWritableFactory; import org.terrier.structures.seralization.FixedSizeTextFactory; import org.terrier.structures.seralization.FixedSizeWriteableFactory; import org.terrier.utility.ApplicationSetup; import org.terrier.utility.ArrayUtils; import org.terrier.utility.Files; import org.terrier.utility.TerrierTimer; import org.terrier.utility.Wrapper; import org.terrier.utility.io.HadoopUtility; import org.terrier.utility.io.RandomDataInput; import org.terrier.utility.io.RandomDataInputMemory; import org.terrier.utility.io.WrappedIOException; /** A {@link MetaIndex} implementation that compresses contents. * Values have maximum lengths, but overall value blobs are * compressed using java.util.zip.Inflater. * @author Craig Macdonald & Vassilis Plachouras * @since 3.0 */ @SuppressWarnings("deprecation") public class CompressingMetaIndex implements MetaIndex { private final static Pattern SPLIT_SPACE = Pattern.compile("\\s+"); /** logger to be used in this class */ private static Logger logger = Logger.getLogger(CompressingMetaIndex.class); /** * A Hadoop input format for a compressing meta index (allows the reading of a meta index * as input to a MapReduce job. */ public static class CompressingMetaIndexInputFormat implements InputFormat<IntWritable, Wrapper<String[]>> { static String STRUCTURE_NAME_JC_KEY = "MetaIndexInputStreamRecordReader.structureName"; /** * Set structure * @param jc * @param metaStructureName */ public static void setStructure(JobConf jc, String metaStructureName) { jc.set(STRUCTURE_NAME_JC_KEY, metaStructureName); } static class MetaIndexSplit extends FileSplit { int startId; int endId; public MetaIndexSplit(){ super(null, (long)0, (long)0, new String[0]); } public MetaIndexSplit(Path file, long start, long length, String[] hosts, int _startId, int _endId) { super(file, start, length, hosts); startId = _startId; endId = _endId; } public void readFields(DataInput in) throws IOException { super.readFields(in); startId = in.readInt(); endId = in.readInt(); } public void write(DataOutput out) throws IOException { super.write(out); out.writeInt(startId); out.writeInt(endId); } public String toString() { StringBuilder rtr = new StringBuilder(); rtr.append("MetaIndexSplit: BlockSize=").append(this.getLength()); rtr.append(" startAt=").append(+this.getStart()); try{ rtr.append(" hosts="); rtr.append(ArrayUtils.join(this.getLocations(), ",")); } catch (IOException ioe ) { logger.warn("Problem getting locations", ioe); } rtr.append(" ids=["+startId+","+endId +"]"); return rtr.toString(); } } static class MetaIndexInputStreamRecordReader implements RecordReader<IntWritable, Wrapper<String[]>> { final InputStream in; final int startID; final int endID; public MetaIndexInputStreamRecordReader(IndexOnDisk index, String structureName, int startingDocID, int endingID) throws IOException { in = new InputStream(index, structureName, startingDocID, endingID); startID = startingDocID; endID = endingID; } public void close() throws IOException { in.close(); } public IntWritable createKey() { return new IntWritable(); } public Wrapper<String[]> createValue() { return new Wrapper<String[]>(); } public long getPos() throws IOException { return 0; } public float getProgress() throws IOException { return (float)(in.getIndex() - startID)/(float)(endID - startID); } public boolean next(IntWritable docid, Wrapper<String[]> values) throws IOException { if (! in.hasNext()) return false; //these methods MUST have this order values.setObject(in.next()); docid.set(in.getIndex()); return true; } } /** * {@inheritDoc} */ public RecordReader<IntWritable, Wrapper<String[]>> getRecordReader( InputSplit _split, JobConf jc, Reporter reporter) throws IOException { HadoopUtility.loadTerrierJob(jc); //load the index Index.setIndexLoadingProfileAsRetrieval(false); IndexOnDisk index = HadoopUtility.fromHConfiguration(jc); if (index == null) throw new IOException("Index could not be loaded from JobConf: " + Index.getLastIndexLoadError() ); //determine the structure to work on String structureName = jc.get(STRUCTURE_NAME_JC_KEY); if (structureName == null) throw new IOException("JobConf property "+STRUCTURE_NAME_JC_KEY+" not specified"); //get the split MetaIndexSplit s = (MetaIndexSplit)_split; return new MetaIndexInputStreamRecordReader(index, structureName, s.startId, s.endId); } private static String[] getHosts(FileStatus fs, FileSystem f, long start, long len) throws IOException { BlockLocation[] bs = f.getFileBlockLocations(fs, start, len); Set<String> hosts = new HashSet<String>(); for(BlockLocation b : bs) { for(String host : b.getHosts()) { hosts.add(host); } } return hosts.toArray(new String[0]); } /** * {@inheritDoc} */ public InputSplit[] getSplits(JobConf jc, int advisedNumberOfSplits) throws IOException { logger.setLevel(Level.DEBUG); HadoopUtility.loadTerrierJob(jc); List<InputSplit> splits = new ArrayList<InputSplit>(advisedNumberOfSplits); IndexOnDisk index = HadoopUtility.fromHConfiguration(jc); String structureName = jc.get(STRUCTURE_NAME_JC_KEY); final String dataFilena

评论收藏

内容反馈