matlab_基于Q学习实现自主选频与动态频谱接入，认知无线电

共1个文件

m：1个

版权申诉

matlab

5星 · 超过95%的资源 70 浏览量 2022-06-20 21:51:44 上传评论 1 收藏 2KB ZIP 举报

资源详情

资源评论

资源推荐

收起资源包目录

基于Q学习实现自主选频与动态频谱接入.zip （1个子文件）

基于Q学习实现自主选频与动态频谱接入

Qlearning.m 7KB

clear all; clc; %找出所有可能状态 m = 1; for d = 1:2 for i = 0:4 for j = 0:4 for k = 0:4 for n = 0:4 s(m,:) = [i,j,k,n,d]; m = m+1; end end end end end %找到所有可能的状态 %初始化 iteration = 5000; ls = size(s,1); setofaction = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16];%动作集合 la = length(setofaction); Q=zeros(ls,la);%初始化Q值 p = ones(ls,la)/la; indicate = 1; % ps = [0.3 0.3 0.2 0.1 0.1; % 0.3 0.2 0.3 0.1 0.1; % 0.1 0.3 0.2 0.3 0.1; % 0.1 0.1 0.3 0.2 0.3; % 0.1 0.1 0.2 0.3 0.3];%状态转移概率 ps = ones(5,5)*0.2; s1 = [4 4 4 4];%初始化状态 %demend = randsrc(1,1,[1 2 3;0.3 0.5 0.2]); Bth = 2; type = randsrc(1,1,[1,2;0.4 0.6]); state = [s1,type];%实际状态 Qofstate(indicate,:)=zeros(1,la);%用来画图的Q值 type = randsrc(1,1,[1,2;0.4 0.6]); sofsave = randsrc(1,4,[0 1 2 3 4; .2 .2 .2 .2 .2]); stateofsave=[sofsave,type];%需要存储的状态 beta = 2; payoff = 0; lamada = 0.1; gama1 = 0.0001; for index = 1:iteration state; num = find(ismember(s,state,'rows')==1); action = randsrc(1,1,[setofaction;p(num,:)]); if type == 1 if (action == 0) payoff = 0; if(state(1:end-1)==zeros(1,4)) payoff = 1; end else if action <= 4 channel = 1; if state(channel) < action payoff = 0; else if action >= Bth payoff = action; else payoff=Bth*exp(-beta*(Bth/action-1)); end end elseif action <= 8 channel = 2; if state(channel) < action-4 payoff = 0; else if action-4 >= Bth payoff = action-4; else payoff=Bth*exp(-beta*(Bth/(action-4)-1)); end end elseif action <= 12 channel = 3; if state(channel) < action-8 payoff = 0; else if action-8 >= Bth payoff = action-8; else payoff=Bth*exp(-beta*(Bth/(action-8)-1)); end end elseif action <= 16 channel = 4; if state(channel) < action-12 payoff = 0; else if action-12 >= Bth payoff = action-12; else payoff=Bth*exp(-beta*(Bth/(action-12)-1)); end end end end else if (action == 0) payoff = 0; if(state(1:end-1)==zeros(1,4)) payoff = 1; end else if action <= 4 channel = 1; if state(channel) < action payoff = 0; else if action >= Bth payoff = 2*Bth - action; else payoff=Bth*exp(-beta*(Bth/action-1))*0.8; end end elseif action <= 8 channel = 2; if state(channel) < action-4 payoff = 0; else if action-4 >= Bth payoff = 2*Bth - (action-4)*0.8; else payoff=Bth*exp(-beta*(Bth/(action-4)-1)); end end elseif action <= 12 channel = 3; if state(channel) < action-8 payoff = 0; else if action-8 >= Bth payoff = 2*Bth-(action-8)*0.8; else payoff=Bth*exp(-beta*(Bth/(action-8)-1)); end end elseif action <= 16 channel = 4; if state(channel) < action-12 payoff = 0; else if action-12 >= Bth payoff = 2*Bth - (action - 12)*0.8; else payoff=Bth*exp(-beta*(Bth/(action-12)-1)); end end end end end %判断当前状态是否为要存储Q值的状态 if(all(state==stateofsave)) mark = 1; indicate = indicate + 1; else mark = 0; end %下一时刻的状态 for i = 1:4 if s1(i)==0 s1(i)=randsrc(1,1,[0 1 2 3 4;ps(1,:)]); elseif s1(i) == 1 s1(i)=randsrc(1,1,[0 1 2 3 4;ps(2,:)]); elseif s1(i) == 2 s1(i)=randsrc(1,1,[0 1 2 3 4;ps(3,:)]); elseif s1(i) == 3 s1(i)=randsrc(1,1,[0 1 2 3 4;ps(4,:)]); elseif s1(i) == 4 s1(i)=randsrc(1,1,[0 1 2 3 4;ps(5,:)]); end end type = randsrc(1,1,[1 2 ;0.4 0.6]); state = [s1,type];%下一时刻的状态 numofstate = find(ismember(s,state,'rows')==1); maxQ = max(Q(numofstate,:)); Q(num,1+action) = Q(num,1+action) + lamada*(payoff + gama1*maxQ - Q(num,1+action)); %存储Q值 if mark == 1 Qofstate(indicate,:)=Qofstate(indicate-1,:); Qofstate(indicate,action+1) = Q(num,1+action) ; end %更新选择概率 sumofQ = 0; gama = 1/(2 + 0.00001*index); for m = 1:la sumofQ = sumofQ + exp(Q(num,m)/gama); end p(num,:) = exp(Q(num,:)/gama)/sumofQ; % p(num,end) = 1 - sum(p(num,1:end-1)); end x=1:size(Qofstate,1); plot(Qofstate(x,1),'p-'); hold on plot(x,Qofstate(x,2),'r-o','LineWidth',1); plot(x,Qofstate(x,3),'r-*','LineWidth',1); plot(x,Qofstate(x,4),'r-s','LineWidth',1); plot(x,Qofstate(x,5),'r-d','LineWidth',1); plot(x,Qofstate(x,6),'g-o','LineWidth',1); plot(x,Qofstate(x,7),'g-*','LineWidth',1); plot(x,Qofstate(x,8),'g-s','LineWidth',1'); plot(x,Qofstate(x,9),'g-d','LineWidth',1); plot(x,Qofstate(x,10),'b-o','LineWidth',1); plot(x,Qofstate(x,11),'b-*','LineWidth',1); plot(x,Qofstate(x,12),'b-s','LineWidth',1); plot(x,Qofstate(x,13),'b-d','LineWidth',1); plot(x,Qofstate(x,14),'m-o','LineWidth',1); plot(x,Qofstate(x,15),'m-*','LineWidth',1); plot(x,Qofstate(x,16),'m-s','LineWidth',1); plot(x,Qofstate(x,17),'m-d','LineWidth',1); grid on; xlabel('The iteration index'); ylabel('Q-value'); legend('reject','1-1','1-2','1-3','1-4','2-1','2-2','2-3','2-4','3-1','3-2','3-3','3-4','4-1','4-2','4-3','4-4');