/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
package cc.mallet.fst;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.text.DecimalFormat;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureInducer;
import cc.mallet.types.FeatureSelection;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.IndexedSparseVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.MatrixOps;
import cc.mallet.types.RankedFeatureVector;
import cc.mallet.types.Sequence;
import cc.mallet.types.SparseVector;
import cc.mallet.pipe.Noop;
import cc.mallet.pipe.Pipe;
import cc.mallet.util.ArrayUtils;
import cc.mallet.util.MalletLogger;
import cc.mallet.util.Maths;
/* There are several different kinds of numeric values:
"weights" range from -Inf to Inf. High weights make a path more
likely. These don't appear directly in Transducer.java, but appear
as parameters to many subclasses, such as CRFs. Weights are also
often summed, or combined in a dot product with feature vectors.
"unnormalized costs" range from -Inf to Inf. High costs make a
path less likely. Unnormalized costs can be obtained from negated
weights or negated sums of weights. These are often returned by a
TransitionIterator's getValue() method. The LatticeNode.alpha
values are unnormalized costs.
"normalized costs" range from 0 to Inf. High costs make a path
less likely. Normalized costs can safely be considered as the
-log(probability) of some event. They can be obtained by
subtracting a (negative) normalizer from unnormalized costs, for
example, subtracting the total cost of a lattice. Typically
initialCosts and finalCosts are examples of normalized costs, but
they are also allowed to be unnormalized costs. The gammas[][],
stateGammas[], and transitionXis[][] are all normalized costs, as
well as the return value of Lattice.getValue().
"probabilities" range from 0 to 1. High probabilities make a path
more likely. They are obtained from normalized costs by taking the
log and negating.
"sums of probabilities" range from 0 to positive numbers. They are
the sum of several probabilities. These are passed to the
incrementCount() methods.
*/
/**
* Represents a CRF model.
*/
public class CRF extends Transducer implements Serializable
{
private static Logger logger = MalletLogger.getLogger(CRF.class.getName());
static final String LABEL_SEPARATOR = ",";
protected Alphabet inputAlphabet;
protected Alphabet outputAlphabet;
protected ArrayList<State> states = new ArrayList<State> ();
protected ArrayList<State> initialStates = new ArrayList<State> ();
protected HashMap<String,State> name2state = new HashMap<String,State> ();
protected Factors parameters = new Factors ();
//SparseVector[] weights;
//double[] defaultWeights; // parameters for default feature
//Alphabet weightAlphabet = new Alphabet ();
//boolean[] weightsFrozen;
// FeatureInduction can fill this in
protected FeatureSelection globalFeatureSelection;
// "featureSelections" is on a per- weights[i] basis, and over-rides
// (permanently disabling) FeatureInducer's and
// setWeightsDimensionsAsIn() from using these features on these transitions
protected FeatureSelection[] featureSelections;
// Store here the induced feature conjunctions so that these conjunctions can be added to test instances before transduction
protected ArrayList<FeatureInducer> featureInducers = new ArrayList<FeatureInducer>();
// An integer index that gets incremented each time this CRFs parameters get changed
protected int weightsValueChangeStamp = 0;
// An integer index that gets incremented each time this CRFs parameters' structure get changed
protected int weightsStructureChangeStamp = 0;
protected int cachedNumParametersStamp = -1; // A copy of weightsStructureChangeStamp the last time numParameters was calculated
protected int numParameters;
/** A simple, transparent container to hold the parameters or sufficient statistics for the CRF. */
public static class Factors implements Serializable {
public Alphabet weightAlphabet;
public SparseVector[] weights; // parameters on transitions, indexed by "weight index"
public double[] defaultWeights;// parameters for default features, indexed by "weight index"
public boolean[] weightsFrozen; // flag, if true indicating that the weights of this "weight index" should not be changed by learning, indexed by "weight index"
public double [] initialWeights; // indexed by state index
public double [] finalWeights; // indexed by state index
/** Construct a new empty Factors with a new empty weightsAlphabet, 0-length initialWeights and finalWeights, and the other arrays null. */
public Factors () {
weightAlphabet = new Alphabet();
initialWeights = new double[0];
finalWeights = new double[0];
// Leave the rest as null. They will get set later by addState() and addWeight()
// Alternatively, we could create zero-length arrays
}
/** Construct new Factors by mimicking the structure of the other one, but with zero values.
* Always simply point to the other's Alphabet; do not clone it. */
public Factors (Factors other) {
weightAlphabet = other.weightAlphabet;
weights = new SparseVector[other.weights.length];
for (int i = 0; i < weights.length; i++)
weights[i] = (SparseVector) other.weights[i].cloneMatrixZeroed();
defaultWeights = new double[other.defaultWeights.length];
weightsFrozen = other.weightsFrozen; // We don't copy here because we want "expectation" and "constraint" factors to get changes to a CRF.parameters factor. Alternatively we declare freezing to be a change of structure, and force reallocation of "expectations", etc.
initialWeights = new double[other.initialWeights.length];
finalWeights = new double[other.finalWeights.length];
}
/** Construct new Factors by copying the other one. */
public Factors (Factors other, boolean cloneAlphabet) {
weightAlphabet = cloneAlphabet ? (Alphabet) other.weightAlphabet.clone() : other.weightAlphabet;
weights = new SparseVector[other.weights.length];
for (int i = 0; i < weights.length; i++)
weights[i] = (SparseVector) other.weights[i].cloneMatrix();
defaultWeights = other.defaultWeights.clone();
weightsFrozen = other.weightsFrozen;
initialWeights = other.initialWeights.clone();
finalWeights = other.finalWeights.clone();
}
/** Construct a new Factors with the same structure as the parameters of 'crf', but with values initialized to zero.
* This method is typically used to allocate storage for sufficient statistics, expectations, constraints, etc. */
public Factors (CRF crf) {
// TODO Change this implementation to this(crf.parameters)
weightAlphabet = crf.parameters.weightAlphabet; // TODO consider cloning this instead
weights = new SparseVector[crf.parameters.weights.length];
for (int i = 0; i < weights.length; i++)
weights[i] = (SparseVector) crf.parame
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
malletTest.rar_Mallet_文本分类java_文本聚类 (748个子文件)
.classpath 1KB
jwnl_properties.dtd 836B
HACKING 202B
package.html 541B
package.html 382B
package.html 335B
package.html 307B
package.html 278B
package.html 276B
package.html 275B
package.html 269B
package.html 265B
package.html 252B
package.html 246B
package.html 233B
package.html 229B
package.html 226B
package.html 224B
package.html 222B
package.html 221B
package.html 205B
package.html 204B
package.html 204B
package.html 201B
package.html 197B
mallet-deps.jar 2.52MB
grmm-deps.jar 898KB
mtj-0.9.9.jar 798KB
trove-2.0.2.jar 721KB
openjgraph.jar 224KB
bsh.jar 222KB
junit-4.5.jar 194KB
jwnl-1.3.jar 167KB
jdom-1.0.jar 150KB
jgrapht-0.6.0.jar 88KB
CRF.java 75KB
ACRF.java 66KB
TestInference.java 58KB
PolylingualTopicModel.java 51KB
ParallelTopicModel.java 49KB
LDAHyper.java 46KB
TestSpacePipe.java 46KB
Dirichlet.java 43KB
TestCRF.java 43KB
HMM.java 39KB
SumLatticeBeam.java 37KB
InstanceList.java 33KB
AbstractTableFactor.java 32KB
FactorGraph.java 31KB
MCMaxEntTrainer.java 29KB
PwplACRFTrainer.java 29KB
SparseVector.java 28KB
Vectors2Classify.java 28KB
SimpleTagger.java 28KB
SimpleTaggerWithConstraints.java 28KB
HierarchicalPAM.java 27KB
PseudolikelihoodACRFTrainer.java 27KB
CommandOption.java 26KB
Calo2Classify.java 26KB
TestMEMM.java 26KB
LDAStream.java 24KB
PAM4L.java 24KB
MultinomialHMM.java 24KB
AugmentableFeatureVector.java 23KB
RankMaxEntTrainer.java 22KB
CRFTrainerByLabelLikelihood.java 22KB
TopicalNGrams.java 22KB
FeatureConstraintUtil.java 22KB
Vectors2Topics.java 21KB
DefaultAcrfTrainer.java 21KB
TRP.java 21KB
PiecewiseACRFTrainer.java 20KB
SumLatticeConstrained.java 20KB
PagedInstanceList.java 19KB
MVNormal.java 19KB
TopicModelDiagnostics.java 19KB
WorkerRunnable.java 19KB
Assignment.java 19KB
MaxLatticeDefault.java 18KB
HierarchicalLDA.java 18KB
Multinomial.java 18KB
MarginalProbEstimator.java 18KB
MaxEntTrainer.java 17KB
LatticeViewer.java 17KB
LDA.java 17KB
TopicInferencer.java 17KB
MatrixOps.java 16KB
SimpleLDA.java 16KB
NaiveBayesTrainer.java 16KB
AbstractBeliefPropagation.java 16KB
SumLatticeDefault.java 16KB
JunctionTreeInferencer.java 15KB
TestTableFactor.java 15KB
ExpGain.java 15KB
TestLogTableFactor.java 15KB
Vectors2Vectors.java 15KB
FeatureVector.java 15KB
MultiSegmentationEvaluator.java 15KB
Transducer.java 15KB
TopicTrainer.java 14KB
共 748 条
- 1
- 2
- 3
- 4
- 5
- 6
- 8
资源评论
小波思基
- 粉丝: 70
- 资源: 1万+
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功