基于SVM的文本分类源代码，C++_基于svm的文本分类代码资源-CSDN文库

共13个文件

cpp：4个

dsw：1个

plg：1个

SVM，支持向量机，C++，文本分类，源代码

5星 · 超过95%的资源需积分: 14 26 浏览量 2012-04-19 14:28:33 上传评论 3 收藏 90KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

svm code.zip （13个子文件）

svm7

testdata 2KB

Release

data.cpp 33KB

training.cpp 2KB

heart_scale.model 15KB

svm7.plg 2KB

testmain.cpp 40KB

svm.h 2KB

heart_scale 27KB

svm7.dsw 533B

svm7.ncb 153KB

svm7.opt 79KB

Debug

svm7.dsp 4KB

svm.cpp 42KB

#include <math.h> #include <stdio.h> #include <stdlib.h> #include <ctype.h> #include <float.h> #include <string.h> #include <stdarg.h> #include "svm.h" typedef float Qfloat; typedef signed char schar; #ifndef min template <class T> inline T min(T x,T y) { return (x<y)?x:y; } #endif #ifndef max template <class T> inline T max(T x,T y) { return (x>y)?x:y; } #endif template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; } template <class S, class T> inline void clone(T*& dst, S* src, int n) { dst = new T[n]; memcpy((void *)dst,(void *)src,sizeof(T)*n); } #define INF HUGE_VAL #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #if 1 void info(char *fmt,...) { va_list ap; va_start(ap,fmt); vprintf(fmt,ap); va_end(ap); } void info_flush() { fflush(stdout); } #else void info(char *fmt,...) {} void info_flush() {} #endif // // Kernel Cache // // l is the number of total data items // size is the cache size limit in bytes // class Cache { public: Cache(int l,int size); ~Cache(); // request data [0,len) // return some position p where [p,len) need to be filled // (p >= len if nothing needs to be filled) int get_data(const int index, Qfloat **data, int len); void swap_index(int i, int j); // future_option private: int l; int size; struct head_t { head_t *prev, *next; // a cicular list Qfloat *data; int len; // data[0,len) is cached in this entry }; head_t* head; head_t lru_head; void lru_delete(head_t *h); void lru_insert(head_t *h); }; Cache::Cache(int l_,int size_):l(l_),size(size_) { head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 size /= sizeof(Qfloat); size -= l * sizeof(head_t) / sizeof(Qfloat); lru_head.next = lru_head.prev = &lru_head; } Cache::~Cache() { for(head_t *h = lru_head.next; h != &lru_head; h=h->next) free(h->data); free(head); } void Cache::lru_delete(head_t *h) { // delete from current location h->prev->next = h->next; h->next->prev = h->prev; } void Cache::lru_insert(head_t *h) { // insert to last position h->next = &lru_head; h->prev = lru_head.prev; h->prev->next = h; h->next->prev = h; } int Cache::get_data(const int index, Qfloat **data, int len) { head_t *h = &head[index]; if(h->len) lru_delete(h); int more = len - h->len; if(more > 0) { // free old space while(size < more) { head_t *old = lru_head.next; lru_delete(old); free(old->data); size += old->len; old->data = 0; old->len = 0; } // allocate new space h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len); size -= more; swap(h->len,len); } lru_insert(h); *data = h->data; return len; } void Cache::swap_index(int i, int j) { if(i==j) return; if(head[i].len) lru_delete(&head[i]); if(head[j].len) lru_delete(&head[j]); swap(head[i].data,head[j].data); swap(head[i].len,head[j].len); if(head[i].len) lru_insert(&head[i]); if(head[j].len) lru_insert(&head[j]); if(i>j) swap(i,j); for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) { if(h->len > i) { if(h->len > j) swap(h->data[i],h->data[j]); else { // give up lru_delete(h); free(h->data); size += h->len; h->data = 0; h->len = 0; } } } } // // Kernel evaluation //核函数赋值 // // the static method k_function is for doing single kernel evaluation // the constructor of Kernel prepares to calculate the l*l kernel matrix // the member function get_Q is for getting one column from the Q Matrix // class Kernel { public: Kernel(int l, svm_node * const * x, const svm_parameter& param); virtual ~Kernel(); static double k_function(const svm_node *x, const svm_node *y, const svm_parameter& param); virtual Qfloat *get_Q(int column, int len) const = 0; virtual void swap_index(int i, int j) const // no so const... { swap(x[i],x[j]); if(x_square) swap(x_square[i],x_square[j]); } protected: double (Kernel::*kernel_function)(int i, int j) const; private: const svm_node **x; double *x_square; // svm_parameter const int kernel_type; const double degree; const double gamma; const double coef0; static double dot(const svm_node *px, const svm_node *py); double kernel_linear(int i, int j) const { return dot(x[i],x[j]); } double kernel_poly(int i, int j) const { return pow(gamma*dot(x[i],x[j])+coef0,degree); } double kernel_rbf(int i, int j) const { return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j]))); } double kernel_sigmoid(int i, int j) const { return tanh(gamma*dot(x[i],x[j])+coef0); } }; Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param) :kernel_type(param.kernel_type), degree(param.degree), gamma(param.gamma), coef0(param.coef0) { switch(kernel_type) { case LINEAR: kernel_function = &Kernel::kernel_linear; break; case POLY: kernel_function = &Kernel::kernel_poly; break; case RBF: kernel_function = &Kernel::kernel_rbf; break; case SIGMOID: kernel_function = &Kernel::kernel_sigmoid; break; } clone(x,x_,l); if(kernel_type == RBF) { x_square = new double[l]; for(int i=0;i<l;i++) x_square[i] = dot(x[i],x[i]); } else x_square = 0; } Kernel::~Kernel() { delete[] x; delete[] x_square; } //dot作用？？ double Kernel::dot(const svm_node *px, const svm_node *py) { double sum = 0; while(px->index != -1 && py->index != -1) { if(px->index == py->index) { sum += px->value * py->value; ++px; ++py; } else { if(px->index > py->index) ++py; else ++px; } } return sum; } // 核函数 double Kernel::k_function(const svm_node *x, const svm_node *y, const svm_parameter& param) { switch(param.kernel_type) { case LINEAR: return dot(x,y); case POLY: return pow(param.gamma*dot(x,y)+param.coef0,param.degree); case RBF: { double sum = 0; while(x->index != -1 && y->index !=-1) { if(x->index == y->index) { double d = x->value - y->value; sum += d*d; ++x; ++y; } else { if(x->index > y->index) { sum += y->value * y->value; ++y; } else { sum += x->value * x->value; ++x; } } } while(x->index != -1) { sum += x->value * x->value; ++x; } while(y->index != -1) { sum += y->value * y->value; ++y; } return exp(-param.gamma*sum); } case SIGMOID: return tanh(param.gamma*dot(x,y)+param.coef0); default: return 0; /* Unreachable */ } } // Generalized SMO+SVMlight algorithm // Solves: // // min 0.5(\alpha^T Q \alpha) + b^T \alpha // // y^T \alpha = \delta // y_i = +1 or -1 // 0 <= alpha_i <= Cp for y_i = 1 // 0 <= alpha_i <= Cn for y_i = -1 // // Given: // // Q, b, y, Cp, Cn, and an initial feasible point \alpha // l is the size of vectors and matrices // eps is the stopping criterion // // solution will be put in \alpha, objective value will be put in obj // class Solver { public: Solver() {}; virtual ~Solver() {}; struct SolutionInfo { double obj; double rho; double upper_bound_p; double upper_bound_n; double r; // for Solver_NU }; void Solve(int l, const Kernel& Q, const double *b_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo* si, int shrinking); protected: int active_size; schar *y; double *G; // gradient of objective function enum { LOWER_BOUND, UPPER_BOUND, FREE }; char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE double *alpha; const Kernel *Q; double eps; double Cp,Cn; double *b; int *active_set; double *G_bar; // gradient, if we treat free variables as 0 int l; bool unshrinked; // XXX double get_C(int i) { return (y[i] > 0)? Cp : Cn; } void update_alpha_status(int i) { if(alpha[i] >= get_C(i)) alpha_status[i] = UPPER_BOUND; else if(alpha[i] <= 0) alpha_status[i] = LOWER_BOUND; else alpha_status[i] = FREE; } bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; } bool is_lower_bound(int i) { return alpha_status[i] == LOWER

评论收藏

内容反馈