clear
clc
states=8; %初始数量
actions=8; %动作数量
final_state=8; %目标状态
episode=100; %迭代次数
gamma=1; %折扣系数
alpha=0.8; %学习速率
p_max=zeros(states,actions);%按Q值表选最大值进行动作的概率
T=5; %退火温度
reward=-1*[inf inf 47 inf inf 24 inf inf;
inf inf 55 31 31 inf 74 79;
47 55 inf 88 23 25 66 inf;
inf 31 88 inf inf 120 inf 29;
inf 31 23 inf inf inf 42 inf;
24 inf 25 120 inf inf inf inf;
inf 74 66 inf 42 inf inf 66;
inf 79 inf 29 inf inf 66 inf];
tic
Q_table=zeros(states,states);
for i=1:episode
current_state=randperm(states,1); %随机指定一个起始节点
while current_state~=final_state; %当前节点是否为目标节点
optinal_action=find(reward(current_state,:)>=-1000); %当前状态的可能动作集
if length(optinal_action)==0 %判断当前状态的可能动作集是否为空
print("not connected")
break
else
next_action=choose_next_action(Q_table,current_state,optinal_action,p_max); %根据退火算法选择当前状态的一个动作
next_state_optinal=find(reward(next_action,:)>=-1000);
maxQ=max(Q_table(next_action,next_state_optinal));
Q_current=Q_table(current_state,next_action);
Q_table(current_state,next_action)=(1-alpha)*Q_table(current_state,next_action)+alpha*(reward(current_state,next_action)+gamma*maxQ);
Q_next=Q_table(current_state,next_action);
p_max(current_state,next_action)=pmax(Q_current,Q_next,T);
current_state=next_action;
end
end
if i==episode/2
reward=-1*[inf inf 47 inf inf 24 inf inf;
inf inf 55 31 31 inf 74 79;
47 55 inf 88 23 25 66 inf;
inf 31 88 inf inf 120 inf 59;
inf 31 23 inf inf inf 42 inf;
24 inf 25 120 inf inf inf inf;
inf 74 66 inf 42 inf inf 66;
inf 79 inf 59 inf inf 66 inf];
Q_table
end
end
Q_table
toc
function [next_action]=choose_next_action(Q_table,current_state,optinal_action,p_max)
[max_Q,max_action]=max(Q_table(current_state,optinal_action));
if rand<=p_max(current_state,optinal_action(max_action))
next_action=optinal_action(randperm(length(optinal_action),1));
else
next_action=optinal_action(max_action);
end
end
function [p_max]=pmax(Q_current,Q_next,T)
p_max=exp(-abs((Q_current-Q_next)/T/Q_next));
end
JJJ69
- 粉丝: 6370
- 资源: 5917
最新资源
- springboot048校园资料分享平台_0303152757.zip
- XILINX FPGA全套图纸 (ZYNQ板Altium原理图和PCB) ,10层ZedBoard原版AD工程,十层PCB设计,6个信号层,4个电源层,是学习AD高速信号多层板的好素材,已打板验证
- springboot249在线互动学习网站设计.zip
- springboot050星之语明星周边产品销售网站的设计与实现.zip
- springboot049在线教育系统设计与实现.zip
- springboot250智慧校园之家长子系统.zip
- springboot051医院管理系统.zip
- springboot251基于springboot-vue的毕业论文管理系统.zip
- springboot251基于springboot-vue的毕业论文管理系统_0303174040.zip
- Openharmony开发记录-5.0.0(API12)引入Mavlink库
- springboot052旅游管理系统的设计与实现.zip
- springboot252基于Springboot和vue的餐饮管理系统的设计与实现_0303174040.zip
- springboot252基于Springboot和vue的餐饮管理系统的设计与实现.zip
- C++、MFC简单绘图系统分享,基于单文档类,实现了基本图形的绘制、画笔自由绘制、绘制文字、插入图片、保存为图片等功能
- springboot053宠物咖啡馆平台的设计与实现.zip
- springboot054飘香水果购物网站的设计与实现.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈