% Feature sign search
% code by Wang Jinjun @ NEC Research Lab America
% reference
% Efficient sparse coding algorithms
% Honglak Lee Alexis Battle Rajat Raina Andrew Y. Ng
% Computer Science Department
% Stanford University
% Stanford, CA 94305
%=================HOW TO=================
% B, your feature matrix n×k
% y, observation vector n×1
% lambda, a Lagrange multiplier. You can set to 0.00001, try to tune it to optimize the results.
% init_x is the guessed value of x. You can set it to all zero. Also, try
% to optimize it. k×1
function [x]=feature_sign(B,y,lambda,init_x,errorGoal)
%%%%%%min_x 0.5\|y-Bx\|_2^2+lambda\|x\|_1
nbases=size(B,2);%字典的列数k
OptTol =errorGoal;% 1e-5;%1e-5;%errorGoal;%1e-5;
if nargin < 4,
x=zeros(nbases, 1);
else
x = init_x;
end;
theta=sign(x); %sign flag
a=(x~=0); %active set
optc=0;
By=B'*y;%相当于B×Si
B_h=B(:,a);%把a数组中为1的作为B的列并选取出来的集合
x_h=x(a);%x中不为0的集合
Bx_h=B_h*x_h;%相当于B*Si
all_d=2*(B'*Bx_h-By);%相当于B'(B*Si-Xi) 为什么要乘以2?
[ma mi]=max(abs(all_d).*(~a));%找到0元素集中的B'(B*Si-Xi)的值和所对应的位置
while optc==0,
optc=1;
if all_d(mi)>lambda+1e-10,
theta(mi)=-1;
a(mi)=1;
b=B(:,mi);
x(mi)=(lambda-all_d(mi))/(b'*b*2);
elseif all_d(mi)<-lambda-1e-10,
theta(mi)=1;
a(mi)=1;
b=B(:,mi);
x(mi)=(-lambda-all_d(mi))/(b'*b*2);
else
if sum(a)==0,
lambda=ma-2*1e-10;
optc=0;
b=B(:,mi);
x(mi)=By(mi)/(b'*b);
break;
end
end
opts=0;
B_h=B(:,a);%Let A? be a submatrix of A that contains only the columns corresponding to the active set
x_h=x(a);
theta_h=theta(a);%Let x? and θ? be subvectors of x and θ corresponding to the active set.
while opts==0,
opts=1;
if size(B_h,2)<=length(y),
BB=B_h'*B_h;
x_new=BB\(B_h'*y-lambda*theta_h/2);
o_new=L1_cost(y,B_h,x_new,lambda);
%cost based on changing sign
s=find(sign(x_new)~=theta_h);
x_min=x_new;
o_min=o_new;
for j=1:length(s),%Check the objective value at x?new and all points where any coefficient changes sign.
zd=s(j);
x_s=x_h-x_h(zd)*(x_new-x_h)/(x_new(zd)-x_h(zd));
x_s(zd)=0; %make sure it's zero
o_s=L1_cost(y,B_h,x_s,lambda);
if o_s<o_min,
x_min=x_s;
o_min=o_s;
end
end
else
d=x_h-B_h'*((B_h*B_h')\(B_h*x_h));
q=x_h./(d+eps);
x_min=x_h;
o_min=L1_cost(y,B_h,x_h,lambda);
for j=1:length(q),
zd=q(j);
x_s=x_h-zd*d;
x_s(j)=0; %make sure it's zero
o_s=L1_cost(y,B_h,x_s,lambda);
if o_s<o_min,
x_min=x_s;
o_min=o_s;
end
end
end
x(a)=x_min;%Update x? (and the corresponding entries in x) to the point with the lowest objective value.
a=(x~=0);
theta=sign(x);%update θ := sign(x).
B_h=B(:,a);
x_h=x(a);%Update x? (and the corresponding entries in x)
theta_h=theta(a);%update θ := sign(x).
Bx_h=B_h*x_h;
active_d=2*(B_h'*(Bx_h-y))+lambda*theta_h;
if ~isempty(find(abs(active_d)>OptTol)),
opts=0;
end
end
all_d=2*(B'*Bx_h-By);
[ma mi]=max(abs(all_d).*(~a));
if ma>lambda+OptTol,
optc=0;
end
end
return;
function cost=L1_cost(y,B,x,lambda)
tmp = y-B*x;
cost = tmp'*tmp+lambda*norm(x,1);
return
评论1