clear all;
clc;
%找出所有可能状态
m = 1;
for d = 1:2
for i = 0:4
for j = 0:4
for k = 0:4
for n = 0:4
s(m,:) = [i,j,k,n,d];
m = m+1;
end
end
end
end
end %找到所有可能的状态
%初始化
iteration = 5000;
ls = size(s,1);
setofaction = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16];%动作集合
la = length(setofaction);
Q=zeros(ls,la);%初始化Q值
p = ones(ls,la)/la;
indicate = 1;
% ps = [0.3 0.3 0.2 0.1 0.1;
% 0.3 0.2 0.3 0.1 0.1;
% 0.1 0.3 0.2 0.3 0.1;
% 0.1 0.1 0.3 0.2 0.3;
% 0.1 0.1 0.2 0.3 0.3];%状态转移概率
ps = ones(5,5)*0.2;
s1 = [4 4 4 4];%初始化状态
%demend = randsrc(1,1,[1 2 3;0.3 0.5 0.2]);
Bth = 2;
type = randsrc(1,1,[1,2;0.4 0.6]);
state = [s1,type];%实际状态
Qofstate(indicate,:)=zeros(1,la);%用来画图的Q值
type = randsrc(1,1,[1,2;0.4 0.6]);
sofsave = randsrc(1,4,[0 1 2 3 4; .2 .2 .2 .2 .2]);
stateofsave=[sofsave,type];%需要存储的状态
beta = 2;
payoff = 0;
lamada = 0.1;
gama1 = 0.0001;
for index = 1:iteration
state;
num = find(ismember(s,state,'rows')==1);
action = randsrc(1,1,[setofaction;p(num,:)]);
if type == 1
if (action == 0)
payoff = 0;
if(state(1:end-1)==zeros(1,4))
payoff = 1;
end
else
if action <= 4
channel = 1;
if state(channel) < action
payoff = 0;
else
if action >= Bth
payoff = action;
else
payoff=Bth*exp(-beta*(Bth/action-1));
end
end
elseif action <= 8
channel = 2;
if state(channel) < action-4
payoff = 0;
else
if action-4 >= Bth
payoff = action-4;
else
payoff=Bth*exp(-beta*(Bth/(action-4)-1));
end
end
elseif action <= 12
channel = 3;
if state(channel) < action-8
payoff = 0;
else
if action-8 >= Bth
payoff = action-8;
else
payoff=Bth*exp(-beta*(Bth/(action-8)-1));
end
end
elseif action <= 16
channel = 4;
if state(channel) < action-12
payoff = 0;
else
if action-12 >= Bth
payoff = action-12;
else
payoff=Bth*exp(-beta*(Bth/(action-12)-1));
end
end
end
end
else
if (action == 0)
payoff = 0;
if(state(1:end-1)==zeros(1,4))
payoff = 1;
end
else
if action <= 4
channel = 1;
if state(channel) < action
payoff = 0;
else
if action >= Bth
payoff = 2*Bth - action;
else
payoff=Bth*exp(-beta*(Bth/action-1))*0.8;
end
end
elseif action <= 8
channel = 2;
if state(channel) < action-4
payoff = 0;
else
if action-4 >= Bth
payoff = 2*Bth - (action-4)*0.8;
else
payoff=Bth*exp(-beta*(Bth/(action-4)-1));
end
end
elseif action <= 12
channel = 3;
if state(channel) < action-8
payoff = 0;
else
if action-8 >= Bth
payoff = 2*Bth-(action-8)*0.8;
else
payoff=Bth*exp(-beta*(Bth/(action-8)-1));
end
end
elseif action <= 16
channel = 4;
if state(channel) < action-12
payoff = 0;
else
if action-12 >= Bth
payoff = 2*Bth - (action - 12)*0.8;
else
payoff=Bth*exp(-beta*(Bth/(action-12)-1));
end
end
end
end
end
%判断当前状态是否为要存储Q值的状态
if(all(state==stateofsave))
mark = 1;
indicate = indicate + 1;
else
mark = 0;
end
%下一时刻的状态
for i = 1:4
if s1(i)==0
s1(i)=randsrc(1,1,[0 1 2 3 4;ps(1,:)]);
elseif s1(i) == 1
s1(i)=randsrc(1,1,[0 1 2 3 4;ps(2,:)]);
elseif s1(i) == 2
s1(i)=randsrc(1,1,[0 1 2 3 4;ps(3,:)]);
elseif s1(i) == 3
s1(i)=randsrc(1,1,[0 1 2 3 4;ps(4,:)]);
elseif s1(i) == 4
s1(i)=randsrc(1,1,[0 1 2 3 4;ps(5,:)]);
end
end
type = randsrc(1,1,[1 2 ;0.4 0.6]);
state = [s1,type];%下一时刻的状态
numofstate = find(ismember(s,state,'rows')==1);
maxQ = max(Q(numofstate,:));
Q(num,1+action) = Q(num,1+action) + lamada*(payoff + gama1*maxQ - Q(num,1+action));
%存储Q值
if mark == 1
Qofstate(indicate,:)=Qofstate(indicate-1,:);
Qofstate(indicate,action+1) = Q(num,1+action) ;
end
%更新选择概率
sumofQ = 0;
gama = 1/(2 + 0.00001*index);
for m = 1:la
sumofQ = sumofQ + exp(Q(num,m)/gama);
end
p(num,:) = exp(Q(num,:)/gama)/sumofQ;
% p(num,end) = 1 - sum(p(num,1:end-1));
end
x=1:size(Qofstate,1);
plot(Qofstate(x,1),'p-');
hold on
plot(x,Qofstate(x,2),'r-o','LineWidth',1);
plot(x,Qofstate(x,3),'r-*','LineWidth',1);
plot(x,Qofstate(x,4),'r-s','LineWidth',1);
plot(x,Qofstate(x,5),'r-d','LineWidth',1);
plot(x,Qofstate(x,6),'g-o','LineWidth',1);
plot(x,Qofstate(x,7),'g-*','LineWidth',1);
plot(x,Qofstate(x,8),'g-s','LineWidth',1');
plot(x,Qofstate(x,9),'g-d','LineWidth',1);
plot(x,Qofstate(x,10),'b-o','LineWidth',1);
plot(x,Qofstate(x,11),'b-*','LineWidth',1);
plot(x,Qofstate(x,12),'b-s','LineWidth',1);
plot(x,Qofstate(x,13),'b-d','LineWidth',1);
plot(x,Qofstate(x,14),'m-o','LineWidth',1);
plot(x,Qofstate(x,15),'m-*','LineWidth',1);
plot(x,Qofstate(x,16),'m-s','LineWidth',1);
plot(x,Qofstate(x,17),'m-d','LineWidth',1);
grid on;
xlabel('The iteration index');
ylabel('Q-value');
legend('reject','1-1','1-2','1-3','1-4','2-1','2-2','2-3','2-4','3-1','3-2','3-3','3-4','4-1','4-2','4-3','4-4');
评论3