%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Q learning of single agent move in N rooms
% Matlab Code companion of
% Q Learning by Example
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function q=ReinforcementLearning
clc;
format short
format compact
% Two input: R and gamma
% immediate reward matrix;
% row and column = states; -Inf = no door between room
R=[-inf,-inf,-inf,-inf, 0, -inf;
-inf,-inf,-inf, 0,-inf, 100;
-inf,-inf,-inf, 0,-inf, -inf;
-inf, 0, 0,-inf, 0, -inf;
0,-inf,-inf, 0,-inf, 100;
-inf, 0,-inf,-inf, 0, 100];
gamma=0.80; % learning parameter
q=zeros(size(R)); % initialize Q as zero,q的行数和列数等于矩阵R的。
q1=ones(size(R))*inf; % initialize previous Q as big number
count=0; % counter
for episode=0:50000
% random initial state
y=randperm(size(R,1));%产生1到6的随机数%a=size(R,1)把矩阵R的行数返回给a,b=size(R,2)把矩阵R的列数返回给b
state=y(1); %取1到6的随机数的第一个数
% select any action from this state
x=find(R(state,:)>=0); % find possible action of this state.返回矩阵R第state行所有列中不小于零的数据的下标
if size(x,1)>0,
x1=RandomPermutation(x); % randomize the possible action
x1=x1(1); % select an action
end
qMax=max(q,[],2);
q(state,x1)= R(state,x1)+gamma*qMax(x1); % get max of all actions
state=x1;
% break if convergence: small deviation on q for 1000 consecutive
if sum(sum(abs(q1-q)))<0.0001 & sum(sum(q >0))
if count>1000,
episode % report last episode
break % for
else
count=count+1; % set counter if deviation of q is small
end
else
q1=q;
count=0; % reset counter when deviation of q from previous q is large
end
end
%normalize q
g=max(max(q));
if g>0,
q=100*q/g;
end
% The code above is using basic library RandomPermutation below
function y=RandomPermutation(A)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% return random permutation of matrix A
% unlike randperm(n) that give permutation of integer 1:n only,
% RandomPermutation rearrange member of matrix A randomly
% This function is useful for MonteCarlo Simulation,
% Bootstrap sampling, game, etc.
%
% Copyright Kardi Teknomo(c) 2005
% (http://people.revoledu.com/kardi/)
%
% example: A = [ 2, 1, 5, 3]
% RandomPermutation(A) may produce [ 1, 5, 3, 2] or [ 5, 3, 2, 3]
%
% example:
% A=magic(3)
% RandomPermutation(A)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[r,c]=size(A);
b=reshape(A,r*c,1); % convert to column vector
x=randperm(r*c); % make integer permutation of similar array as key
w=[b,x']; % combine matrix and key
d=sortrows(w,2); % sort according to key
y=reshape(d(:,1),r,c); % return back the matrix
强化学习matlab源代码
需积分: 46 48 浏览量
2020-12-04
01:30:15
上传
评论 7
收藏 2KB ZIP 举报
rogerjunli
- 粉丝: 5
- 资源: 126
最新资源
- 基于CSS的响应式鲜花网站全屏效果设计源码
- 基于JavaScript的访客预约系统设计源码
- 基于Vue和ECharts的工作租房数据可视化系统设计源码
- 1040g0cg310ravpiu6ibg5pg00tsipsln3ju2d0g 2
- 基于Python的SAR图像去噪CNN-NLM设计源码
- redhat6升级到redhat7,过程redhat6.x-> redhat6.10->rehat7.9 主版本最高版本
- 基于Django的流程引擎设计源码
- 基于Node.js的Express框架与MySQL的后台管理系统设计源码
- 基于Java的Flink流批一体数据处理快速集成开发框架设计源码
- FirstFilterOrderCompare
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈