GENIATagger3.0.1_Vindaugar资源-CSDN文库

需积分: 9 68 浏览量 2013-01-25 22:10:34 上传评论 1 收藏 24.11MB GZ 举报

共44个文件

cpp：8个

exc：5个

dic：3个

资源推荐

资源详情

资源评论

收起资源包目录

geniatagger-3.0.1.tar.gz （44个子文件）

geniatagger-3.0.1

morph.cpp 6KB

models_named_entity

word_info 348KB

model001 18.32MB

morphdic

cousin.exc 28KB

verb.exc 81KB

adj.exc 20KB

verb.dic 158KB

WORDNETLICENSE 2KB

adj.dic 189KB

noun.dic 1.06MB

noun.exc 109KB

adv.exc 73B

common.h 529B

chunking.cpp 10KB

main.cpp 3KB

models_medline

model.bidir.4 2.36MB

model.bidir.11 2.37MB

model.bidir.9 2.26MB

model.bidir.3 2.39MB

model.bidir.6 2.55MB

model.bidir.15 2.78MB

model.bidir.5 2.34MB

model.bidir.13 2.32MB

model.bidir.14 2.6MB

model.bidir.12 2.38MB

model.bidir.1 2.28MB

model.bidir.10 2.28MB

model.bidir.7 2.67MB

model.bidir.2 2.4MB

model.bidir.0 2.43MB

model.bidir.8 2.34MB

LICENSE 2KB

namedentity.cpp 10KB

Makefile 298B

maxent.cpp 21KB

tokenize.cpp 3KB

maxent.h 10KB

models_chunking

model.bidir.4 2.6MB

model.bidir.6 1003KB

model.bidir.2 2.65MB

model.bidir.0 4.37MB

bidir.cpp 20KB

README 2KB

postag.cpp 7KB

/* * $Id: maxent.cpp,v 1.28 2006/08/21 17:30:38 tsuruoka Exp $ */ #include "maxent.h" #include <cmath> #include <cstdio> using namespace std; /* int ME_Model::BLMVMFunctionGradient(double *x, double *f, double *g, int n) { const int nf = _fb.Size(); if (_inequality_width > 0) { assert(nf == n/2); for (int i = 0; i < nf; i++) { _va[i] = x[i]; _vb[i] = x[i + nf]; _vl[i] = _va[i] - _vb[i]; } } else { assert(nf == n); for (int i = 0; i < n; i++) { _vl[i] = x[i]; } } double score = update_model_expectation(); if (_inequality_width > 0) { for (int i = 0; i < nf; i++) { g[i] = -(_vee[i] - _vme[i] - _inequality_width); g[i + nf] = -(_vme[i] - _vee[i] - _inequality_width); } } else { if (_sigma == 0) { for (int i = 0; i < n; i++) { g[i] = -(_vee[i] - _vme[i]); } } else { const double c = 1 / (_sigma * _sigma); for (int i = 0; i < n; i++) { g[i] = -(_vee[i] - _vme[i] - c * _vl[i]); } } } *f = -score; return 0; } int ME_Model::BLMVMLowerAndUpperBounds(double *xl,double *xu,int n) { if (_inequality_width > 0) { for (int i = 0; i < n; i++){ xl[i] = 0; xu[i] = 10000.0; } return 0; } for (int i = 0; i < n; i++){ xl[i] = -10000.0; xu[i] = 10000.0; } return 0; } */ int ME_Model::perform_GIS(int C) { cerr << "C = " << C << endl; C = 1; cerr << "performing AGIS" << endl; vector<double> pre_v; double pre_logl = -999999; for (int iter = 0; iter < 200; iter++) { double logl = update_model_expectation(); fprintf(stderr, "iter = %2d C = %d f = %10.7f train_err = %7.5f", iter, C, logl, _train_error); if (_heldout.size() > 0) { double hlogl = heldout_likelihood(); fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", hlogl, _heldout_error); } cerr << endl; if (logl < pre_logl) { C += 1; _vl = pre_v; iter--; continue; } if (C > 1 && iter % 10 == 0) C--; pre_logl = logl; pre_v = _vl; for (int i = 0; i < _fb.Size(); i++) { double coef = _vee[i] / _vme[i]; _vl[i] += log(coef) / C; } } cerr << endl; } int ME_Model::perform_LMVM() { cerr << "performing LMVM" << endl; if (_inequality_width > 0) { int nvars = _fb.Size() * 2; double *x = (double*)malloc(nvars*sizeof(double)); // INITIAL POINT for (int i = 0; i < nvars / 2; i++) { x[i] = _va[i]; x[i + _fb.Size()] = _vb[i]; } // int info = BLMVMSolve(x, nvars); for (int i = 0; i < nvars / 2; i++) { _va[i] = x[i]; _vb[i] = x[i + _fb.Size()]; _vl[i] = _va[i] - _vb[i]; } free(x); return 0; } int nvars = _fb.Size(); double *x = (double*)malloc(nvars*sizeof(double)); // INITIAL POINT for (int i = 0; i < nvars; i++) { x[i] = _vl[i]; } // int info = BLMVMSolve(x, nvars); for (int i = 0; i < nvars; i++) { _vl[i] = x[i]; } free(x); return 0; } int ME_Model::conditional_probability(const Sample & s, std::vector<double> & membp) const { int num_classes = membp.size(); double sum = 0, maxpow = 0; // int max_label = -1; int max_label = 0; double maxp = 0; vector<double> powv(_num_classes, 0.0); for (vector<int>::const_iterator j = s.positive_features.begin(); j != s.positive_features.end(); j++){ for (vector<int>::const_iterator k = _feature2mef[*j].begin(); k != _feature2mef[*j].end(); k++) { powv[_fb.Feature(*k).label()] += _vl[*k]; } } for (vector<pair<int, double> >::const_iterator j = s.rvfeatures.begin(); j != s.rvfeatures.end(); j++) { for (vector<int>::const_iterator k = _feature2mef[j->first].begin(); k != _feature2mef[j->first].end(); k++) { powv[_fb.Feature(*k).label()] += _vl[*k] * j->second; } } std::vector<double>::const_iterator pmax = max_element(powv.begin(), powv.end()); double offset = max(0.0, *pmax - 700); // to avoid overflow for (int label = 0; label < _num_classes; label++) { double pow = powv[label] - offset; double prod = exp(pow); // cout << pow << " " << prod << ", "; // if (_ref_modelp != NULL) prod *= _train_refpd[n][label]; if (_ref_modelp != NULL) prod *= s.ref_pd[label]; assert(prod != 0); membp[label] = prod; sum += prod; } for (int label = 0; label < _num_classes; label++) { membp[label] /= sum; if (membp[label] > membp[max_label]) max_label = label; } assert(max_label >= 0); return max_label; } int ME_Model::make_feature_bag(const int cutoff) { int max_num_features = 0; // count the occurrences of features #ifdef USE_HASH_MAP typedef __gnu_cxx::hash_map<unsigned int, int> map_type; #else typedef std::map<unsigned int, int> map_type; #endif map_type count; if (cutoff > 0) { for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++) { for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { count[ME_Feature(i->label, *j).body()]++; } for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) { count[ME_Feature(i->label, j->first).body()]++; } } } int n = 0; for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++, n++) { max_num_features = max(max_num_features, (int)(i->positive_features.size())); for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { const ME_Feature feature(i->label, *j); // if (cutoff > 0 && count[feature.body()] < cutoff) continue; if (cutoff > 0 && count[feature.body()] <= cutoff) continue; int id = _fb.Put(feature); // cout << i->label << "\t" << *j << "\t" << id << endl; // feature2sample[id].push_back(n); } for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) { const ME_Feature feature(i->label, j->first); // if (cutoff > 0 && count[feature.body()] < cutoff) continue; if (cutoff > 0 && count[feature.body()] <= cutoff) continue; int id = _fb.Put(feature); } } count.clear(); // cerr << "num_classes = " << _num_classes << endl; // cerr << "max_num_features = " << max_num_features << endl; int c = 0; init_feature2mef(); return max_num_features; } double ME_Model::heldout_likelihood() { double logl = 0; int ncorrect = 0; for (std::vector<Sample>::const_iterator i = _heldout.begin(); i != _heldout.end(); i++) { vector<double> membp(_num_classes); int l = classify(*i, membp); logl += log(membp[i->label]); if (l == i->label) ncorrect++; } _heldout_error = 1 - (double)ncorrect / _heldout.size(); return logl /= _heldout.size(); } double ME_Model::update_model_expectation() { double logl = 0; int ncorrect = 0; _vme.resize(_fb.Size()); for (int i = 0; i < _fb.Size(); i++) _vme[i] = 0; int n = 0; for (vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++, n++) { vector<double> membp(_num_classes); int max_label = conditional_probability(*i, membp); logl += log(membp[i->label]); // cout << membp[*i] << " " << logl << " "; if (max_label == i->label) ncorrect++; // model_expectation for (vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++){ for (vector<int>::const_iterator k = _feature2mef[*j].begin(); k != _feature2mef[*j].end(); k++) { _vme[*k] += membp[_fb.Feature(*k).label()]; } } for (vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) { for (vector<int>::const_iterator k = _feature2mef[j->first].begin(); k != _feature2mef[j->first].end(); k++) {

评论收藏

内容反馈