clear
clc
states=8; %初始数量
actions=8; %动作数量
final_state=8; %目标状态
episode=100; %迭代次数
gamma=1; %折扣系数
alpha=0.8; %学习速率
p_max=zeros(states,actions);%按Q值表选最大值进行动作的概率
T=5; %退火温度
reward=-1*[inf inf 47 inf inf 24 inf inf;
inf inf 55 31 31 inf 74 79;
47 55 inf 88 23 25 66 inf;
inf 31 88 inf inf 120 inf 29;
inf 31 23 inf inf inf 42 inf;
24 inf 25 120 inf inf inf inf;
inf 74 66 inf 42 inf inf 66;
inf 79 inf 29 inf inf 66 inf];
tic
Q_table=zeros(states,states);
for i=1:episode
current_state=randperm(states,1); %随机指定一个起始节点
while current_state~=final_state; %当前节点是否为目标节点
optinal_action=find(reward(current_state,:)>=-1000); %当前状态的可能动作集
if length(optinal_action)==0 %判断当前状态的可能动作集是否为空
print("not connected")
break
else
next_action=choose_next_action(Q_table,current_state,optinal_action,p_max); %根据退火算法选择当前状态的一个动作
next_state_optinal=find(reward(next_action,:)>=-1000);
maxQ=max(Q_table(next_action,next_state_optinal));
Q_current=Q_table(current_state,next_action);
Q_table(current_state,next_action)=(1-alpha)*Q_table(current_state,next_action)+alpha*(reward(current_state,next_action)+gamma*maxQ);
Q_next=Q_table(current_state,next_action);
p_max(current_state,next_action)=pmax(Q_current,Q_next,T);
current_state=next_action;
end
end
if i==episode/2
reward=-1*[inf inf 47 inf inf 24 inf inf;
inf inf 55 31 31 inf 74 79;
47 55 inf 88 23 25 66 inf;
inf 31 88 inf inf 120 inf 59;
inf 31 23 inf inf inf 42 inf;
24 inf 25 120 inf inf inf inf;
inf 74 66 inf 42 inf inf 66;
inf 79 inf 59 inf inf 66 inf];
Q_table
end
end
Q_table
toc
function [next_action]=choose_next_action(Q_table,current_state,optinal_action,p_max)
[max_Q,max_action]=max(Q_table(current_state,optinal_action));
if rand<=p_max(current_state,optinal_action(max_action))
next_action=optinal_action(randperm(length(optinal_action),1));
else
next_action=optinal_action(max_action);
end
end
function [p_max]=pmax(Q_current,Q_next,T)
p_max=exp(-abs((Q_current-Q_next)/T/Q_next));
end
Matlab系列--卫星网络q学习matlab仿真.zip
需积分: 5 34 浏览量
2024-02-24
21:31:48
上传
评论
收藏 2KB ZIP 举报
Kwan的解忧杂货铺
- 粉丝: 2w+
- 资源: 3699
最新资源
- windows 系统库msvcp120dll
- 课程设计基于STM32单片机的计步器的设计源代码+项目说明.zip
- 帮您查看密码navicat-password-decrypt-main.zip
- grpc php教程grpc-php-examples-master.zip
- thrift php教程thrift-tutorial-php-master.zip
- 一键将win10桌面的“此处打开powershell” 替换为 “在此处打开cmd窗口”
- 嵌入式系统通用驱动程序接口及其实现-Spi设备驱动程序
- 高分项目基于STM32单片机的温度控制系统源代码+项目资料齐全+教程文档.zip
- 哈工大软件学院编译原理实验(Python实现)
- 基于matlab app designer设计的一个简易的数独生成与求解器源码+项目说明.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈