%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%Q_learning of single agent move in N rooms
%Matlab Code comption of Qlearning by example
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function q=ReinforcementLearning
clc
format short;
format compact;
%two input :R & gamma
%immediate reward matrix;
%row and column = states;
%-inf =no door between room;
R = [-inf, -inf , -inf, -inf, 0 , -inf;
-inf, -inf , -inf, 0 , -inf , 100;
-inf, -inf , -inf, 0 , -inf , -inf;
-inf, 0 , 0 ,-inf , 0 , -inf;
0 , -inf , -inf, 0 , -inf , 100;
-inf, 0 , -inf, -inf , 0 , 100;];
gamma = 0.8;%learining parameter
q = zeros(size(R));
q1 = ones(size(R));
count = 0;
for episode = 0:50000
%random initial state
y = randperm(size(R,1));%make random date from 1 to 6
a = size(R,1);
b = size(R,2);
state = y(1);
%select any action from this state
x = find(R(state,:)>=0);%find possible action of this state
if(size(x,1)>0)
x1 = RandomPermutation(x);
x1 = x1(1);
end
qMax = max(q,[],2);
q(state,x1) = R(state,x1) + gamma*qMax(x1);%get max of all actions
%break if convergence: small deviation on q for 1000 consective
if sum(sum(abs(q1-q))) < 0.0001 & sum(sum(q>0))
if count>1000
episode;
break
else
count = count +1;
end
else
q1 = q;
count = 0;
end
end
%nomalize q
g = max(max(q));
if g>0
q = 100*q/g;
end
Q学习示例--路径搜索matlab完整代码
1星 需积分: 34 121 浏览量
2017-09-03
11:01:41
上传
评论 3
收藏 1KB RAR 举报
wiiliam_
- 粉丝: 22
- 资源: 3