matlab源代码强化学习算法二维地图.rar资源-CSDN文库

共3个文件

m：3个

版权申诉

matlab

强化学习

5星 · 超过95%的资源 39 浏览量 2021-12-13 09:36:41 上传评论收藏 2KB RAR 举报

资源推荐

资源详情

资源评论

收起资源包目录

二维地图.rar （3个子文件）

二维迷宫

get_env_feedback.m 634B

choose_action.m 3KB

Maze_main.m 2KB

function A = choose_action( S, choices ) % 选择动作1,2,3,4，分别对应上下左右的动作 % greedy policy global Maze_row Maze_col epsilon; % 找到S对应的坐标 col = mod(S, Maze_col); % 列 if col == 0 col = Maze_col; end row = ceil(S/Maze_col); % 行 a = rand; if col>1 && col<Maze_col && row>1 && row<Maze_row if a>epsilon || ~any(choices)% 在初始状态都为0的时候保证随机选取，而不是总选择第一个 A = ceil(4*rand); return else [maximum ,A] = max(choices); return end elseif row == 1 if col == 1 % 在坐标(1,1), A=1 or 2 S_choices = [choices(1), choices(2)]; if a>epsilon || ~any(S_choices) A = ceil(2*rand); return else [maximum ,A] = max(S_choices); return end elseif col == Maze_col % position(1,7), A = 1 or 4 S_choices = [choices(1), choices(4)]; if a>epsilon || ~any(S_choices) A = ceil(2*rand); else [maximum ,A] = max(S_choices); end if A == 2 A = 4; return else return end else % position between from (1,1) to (1,7) , action 1, 2, or 4 S_choices = [choices(1), choices(2), choices(4)]; if a>epsilon || ~any(S_choices) A = ceil(3*rand); else [maximum ,A] = max(S_choices); end if A == 3 A = 4; return else return end end elseif row == Maze_row if col == 1 % 在坐标(6,1), A=2 or 3 S_choices = [choices(2), choices(3)]; if a>epsilon || ~any(S_choices) A = ceil(2*rand); A = A+1; return else [maximum ,A] = max(S_choices); A = A+1; return end elseif col == Maze_col % position(6,7), A = 3 or 4 S_choices = [choices(3), choices(4)]; if a>epsilon || ~any(S_choices) A = ceil(2*rand); A = A+2; return else [maximum ,A] = max(S_choices); A = A+2; return end else % position between from (6,1) to (6,7) , action 2, 3, or 4 S_choices = [choices(2), choices(3), choices(4)]; if a>epsilon || ~any(S_choices) A = ceil(3*rand); A = A+1; return else [maximum ,A] = max(S_choices); A = A+1; return end end elseif col == 1 % 不包括顶点 A=1,2 or 3 S_choices = [choices(1), choices(2), choices(3)]; if a>epsilon || ~any(S_choices) A = ceil(3*rand); return else [maximum ,A] = max(S_choices); return end elseif col == Maze_col % 不包括顶点 A=1,3 or 4 S_choices = [choices(1), choices(3), choices(4)]; if a>epsilon || ~any(S_choices) A = ceil(3*rand); else [maximum ,A] = max(S_choices); end if A == 1 return else A = A+1; return end end end

评论收藏

内容反馈

版权申诉