/* Ozan Irsoy, Bogazici University, 2012.
*
* SoftTree.cpp implements Soft Regression Trees.
* Internal nodes have linear sigmoid splits,
* leaf nodes have constant values.
*
*
* See,
* "Soft Decision Trees", O. Irsoy, O. T. Yildiz, E. Alpaydin,
* ICPR 21, Tsukuba, Japan.
* for details.
*
*
*/
#include <iostream>
#include <fstream>
#include <algorithm>
#include <cmath>
#include <string>
#include <vector>
#include <cassert>
#include "common.cpp"
#define HARDINIT true // if true, optimization starts from hard tree parameters, else, randomly
#define MINALPHA 1 // starting range of learning rate
#define MAXALPHA 10 // ending range of learning rate
#define MAXEPOCH 25 // number of epochs in training
#define MAXRETRY 10 // number of restart of optimization from a starting point
#define PRETH 1e-3 // prepruning threshold
#define uint unsigned int
using namespace std;
class SoftTree;
class Node;
class Node
{
public:
Node();
private:
void learnParams(const vector<vector<double> > &, const vector<double> &,
const vector<vector<double> > &, const vector<double> &,
double, SoftTree*);
void splitNode(const vector<vector<double> > &, const vector<double> &,
const vector<vector<double> > &, const vector<double> &,
SoftTree*);
double evaluate(const vector<double> &);
uint size();
void print(uint);
void hardInit(const vector< vector <double> > &, const vector<double> &);
// pointers & flags
Node* parent;
Node* left;
Node* right;
bool isLeaf;
bool isLeft;
// params
vector<double> w;
double w0; // if leaf, response value
// if internal, bias of split
double y; // last computed response
double v; // last computed gating
friend class SoftTree;
};
class SoftTree
{
public:
SoftTree(const vector< vector<double> > &X, const vector<double> &Y,
const vector< vector<double> > &V, const vector<double> &R,
char type = 'r');
double evaluate(const vector<double> &);
double meanSqErr(const vector<vector<double> > &, const vector<double> &);
double errRate(const vector<vector<double> > &, const vector<double> &);
uint size();
void print();
private:
Node* root;
char type; // 'r'egression or 'c'lassification
friend class Node;
};
Node::Node()
{
isLeaf = true;
parent = NULL;
left = NULL;
right = NULL;
}
SoftTree::SoftTree(const vector< vector<double> > &X, const vector<double> &Y,
const vector< vector<double> > &V, const vector<double> &R,
char type) {
assert(type == 'r' || type == 'c');
this->type = type;
root = new Node;
root->w = vector<double>(X[0].size());
root->w0 = 0;
for (uint i=0; i<Y.size(); i++)
root->w0 += Y[i];
root->w0 /= Y.size();
root->splitNode(X, Y, V, R, this);
}
void Node::hardInit(const vector< vector< double > >& X, const vector< double >& Y)
{
vector<double> sv(X.size()); // soft membership
double total=0;
assert(w.size() != 0);
// (1) compute soft memberships
for (uint j=0; j<X.size(); j++) {
double t = 1;
Node* m = this;
Node* p;
while(m->parent != NULL) {
p = m->parent;
if (m->isLeft)
t *= sigmoid(dot(p->w, X[j]) + p->w0);
else
t *= (1-sigmoid(dot(p->w, X[j]) + p->w0));
m = m->parent;
}
sv[j] = t;
total += t;
}
if (total <= 1) { // not enough data, init randomly
w = vector<double>(X[0].size());
for (uint i=0; i<w.size(); i++)
w[i] = urand(-0.005, 0.005);
w0 = urand(-0.005, 0.005);
left->w0 = urand(-0.005, 0.005);
right->w0 = urand(-0.005, 0.005);
return;
}
uint dim, bestDim=-1;
double errBest = -1;
double bestSplit;
double bestw10, bestw20;
vector<double> bestw1, bestw2;
// (2) look for the best hard split
for (dim=0; dim<X[0].size(); dim++)
{
vector<pair<double,uint> > f;
for (uint i=0; i<X.size(); i++)
f.push_back(make_pair(X[i][dim],i));
sort(f.begin(), f.end());
double sp;
for (uint i=0; i<f.size()-1; i++) {
if (f[i].first == f[i+1].first) continue;
sp = 0.5*(f[i].first + f[i+1].first);
double w10,w20,left,right,lsum,rsum;
w10 = w20 = lsum = rsum = 0;
for (uint j=0; j<=i; j++) {
w10 += Y[f[j].second]*sv[f[j].second];
lsum += sv[f[j].second];
}
w10 /= lsum;
for (uint j=i+1; j<f.size(); j++) {
w20 += Y[f[j].second]*sv[f[j].second];
rsum += sv[f[j].second];
}
w20 /= rsum;
// weighted MSE for regression and
// weighted Gini Impurity for classification
double errl = 0, errr = 0;
for (uint j=0; j<=i; j++)
errl += (w10 - Y[f[j].second])*(w10 - Y[f[j].second])*sv[f[j].second];
errl /= lsum;
for (uint j=i+1; j<f.size(); j++)
errr += (w20 - Y[f[j].second])*(w20 - Y[f[j].second])*sv[f[j].second];
errr /= rsum;
double a = lsum/(lsum+rsum+0.0);
double b = rsum/(lsum+rsum+0.0);
if (a*errl + b*errr < errBest || errBest == -1) {
bestSplit = sp;
bestDim = dim;
errBest = a*errl + b*errr;
bestw10 = w10;
bestw20 = w20;
//cout << errbest << endl;
}
}
}
// (3) init params according to best hard split
w = vector<double>(X[0].size());
for (uint i=0; i<w.size(); i++)
w[i] = urand(-0.005, 0.005);
w[bestDim] = -0.5;
w0 = bestSplit*0.5;
left->w0 = bestw10;
right->w0 = bestw20;
//cout << "bestsplit: " << bestsplit << endl;
assert(w.size() != 0);
}
double SoftTree::evaluate(const vector<double> &x) {
if (type == 'r')
return root->evaluate(x);
else if (type == 'c')
return sigmoid(root->evaluate(x));
}
uint SoftTree::size() {
return root->size();
}
uint Node::size() {
if (isLeaf)
return 1;
else
return 1 + left->size() + right->size();
}
void Node::print(uint depth) {
for (int i=0; i<depth; i++) cout << "__";
if (!isLeaf)
for (int i=0; i<w.size(); i++) cout << w[i] << ", ";
cout << w0 << endl;
if (!isLeaf) {
left->print(depth+1);
right->print(depth+1);
}
}
void SoftTree::print() {
root->print(1);
}
double Node::evaluate(const vector<double> &x) {
if (isLeaf)
y = w0;
else {
v = sigmoid(dot(w,x)+w0);
y = v*(left->evaluate(x)) + (1-v)*(right->evaluate(x));
}
return y;
}
// This learns the parameters of current node and their potential children
void Node::learnParams(const vector< vector<double> > &X, const vector<double> &Y,
const vector< vector<double> > &V, const vector<double> &R,
double alpha, SoftTree* tree) {
double u = 0.1; // momentum weight
double eps = 0.00001;
uint e,i,j,temp;
vector<uint> ix;
vector<double> dw(X[0].size()); // grad of w
vector<double> dwp(X[0].size()); // previous grad of w
double dw10, dw20, dw0; // grads of w0
double dw10p, dw20p, dw0p; // previous grads of w0
for (i=0; i<Y.size(); i++) ix.push_back(i);
for (e=0; e<MAXEPOCH; e++) {
shuffle(ix);
for (i=0; i<X.size(); i++) {
j = ix[i];
vector<double> x = X[j];
double r = Y[j];
double y = tree->evaluate(x);
double d = y - r;
double t = alpha*d;
Node* m = this;
Node* p;
// compute negative gradient
while(m->parent != NULL) {
p = m->parent;
if (m->isLeft)
soft-tree-master_softtree_Soft!_源码
版权申诉
170 浏览量
2021-10-04
01:38:32
上传
评论
收藏 11KB ZIP 举报
周玉坤举重
- 粉丝: 63
- 资源: 4780
最新资源
- JAVA实现Modbus RTU或Modbus TCPIP案例.zip
- 基于YOLOv8的FPS TPS AI自动锁定源码+使用步骤说明.zip
- JAVA实现Modbus RTU或Modbus TCPIP案例.zip
- 基于yolov8+streamlit的火灾检测部署源码+模型.zip
- 测试aaaaaaabbbbb
- VID20240521070643.mp4
- Android系统原理与开发学习要点详解-培训课件.zip
- 部署yolov8的tensorrt模型支持检测分割姿态估计的C++源码+部署步骤.zip
- 以简单、易用、高性能为目标、开源的时序数据库,支持Linux及Windows, Time Series Database.zip
- python-leetcode面试题解之第198题打家劫舍-题解.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈