% REINFORCEMENT LEARNING CONTROL OF TWO TANK LIQUID LEVEL SYSTEM
% Dr. Mathew Mithra Noel
% School of Electrical Engineering
% Vellore Institute of Technology
clear all;
clc;
% Define final desired goal state
global state_desired;
state_desired= 7;
% Flow to Tank 2 is not controlled and hence set to zero
global Q2;
Q2=0;
% Discretize state space
global h1;
global h2;
h1=linspace(0,10,15);
h2=h1;
global delta;
delta= (h1(2)-h1(1))/2;
% Discretize action space
global action;
Q1=linspace(0,20,10);
N1 = length(h1);
N2 = length(h2);
% Initialize policy and value.
pibest = zeros(N1,N2);
gamma =0.99;
% Set the initial guess for V(s) to be zero for each state s.
V = zeros(N1,N2);
policy = zeros(N1,N2);
% Compute the optimal value function using the Value Iteration algorithm.
for runs=1:1000
for m=1:N1
for n=1:N2
for p =1:length(Q1)
% Take all possible actions.
action = Q1(p);
snext = [h1(m); h2(n)]+ 0.1*tank(0,[h1(m); h2(n)]);
% Compute the closest discretized state.
[r,s] = closest(snext);
nextV(p)=V(r,s);
end
[Vbest,bestind] = max(nextV);
% Improve value function estimate using Bellman's equation.
V(m,n)= Reward([h1(m); h2(n)] ) + gamma*Vbest ;
end
end
end
% Compute the optimal policy from the optimal value function.
for m=1:N1
for n=1:N2
% Take all possible actions.
for p =1:length(Q1)
action = Q1(p);
snext = [h1(m); h2(n)]+ 0.1*tank(0,[h1(m); h2(n)]);
% Compute the closest discretized state.
[r,s] = closest(snext);
nextV(p)=V(r,s);
end
[Vbest,bestind] = max(nextV);
pibest(m,n) = Q1(bestind);
end
end
N = 100;
state=[1 0]; %Initial state
states = zeros(N,2);
states(1,:)= state ;
Ts = 0.1; % Define time between control actions.
% Simulate the system with the optimal control policy.
for n=2:N
[r,s] = closest(state);
% Use linear regression to interpolate between control actions for
% discretized states
% A Feedfroward Neural Network can also be used as in the following paper:
% Control of a nonlinear liquid level system using a new artificial neural network based reinforcement learning approach,
% Applied Soft Computing, Volume 23, 2014, Pages 444-451, ISSN 1568-4946, https://doi.org/10.1016/j.asoc.2014.06.037.
% (http://www.sciencedirect.com/science/article/pii/S1568494614003111)
if r > 1 && s > 1 && r < N1 && s < N2
X = [h1(r) h2(s);h1(r-1) h2(s);h1(r+1) h2(s);h1(r) h2(s-1);h1(r) h2(s+1)];
Y = [pibest(r,s) pibest(r-1,s) pibest(r+1,s) pibest(r,s-1) pibest(r,s+1)]';
lin_model = fitlm(X,Y);
action = predict(lin_model,state);
else
action = pibest(r,s);
end
%Simulate the system for one time step.
[t,y]=ode45(@tank,[0 Ts],state);
state = real(y(end,:));
states(n,:) = state;
end
% Plot time history of states with optimal policy.
time = (1:length(states))*Ts;
plot(time,states);
xlabel('time (s)');
ylabel('state: liquid levels h_1 and h_2');
没有合适的资源?快使用搜索试试~ 我知道了~
基于人工神经网络的强化学习方法控制非线性液位系统matlab实现.rar
共6个文件
m:5个
png:1个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 9 浏览量
2024-10-09
18:39:52
上传
评论
收藏 23KB RAR 举报
温馨提示
1.版本:matlab2014/2019a/2024a 2.附赠案例数据可直接运行matlab程序。 3.代码特点:参数化编程、参数可方便更改、代码编程思路清晰、注释明细。 4.适用对象:计算机,电子信息工程、数学等专业的大学生课程设计、期末大作业和毕业设计。
资源推荐
资源详情
资源评论
收起资源包目录
基于人工神经网络的强化学习方法控制非线性液位系统matlab实现.rar (6个子文件)
基于人工神经网络的强化学习方法控制非线性液位系统matlab实现
Reward.m 95B
closest.m 416B
1.png 21KB
tank.m 329B
tank_RL.m 3KB
ANN_RL.m 3KB
共 6 条
- 1
资源评论
天天Matlab代码科研顾问
- 粉丝: 3w+
- 资源: 2406
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 电线线路残旧残坏检测22-YOLO(v5至v9)、COCO、CreateML、Darknet、Paligemma、TFRecord、VOC数据集合集.rar
- 基于抽象链条推理的大型语言模型高效工具使用方法研究
- Discord Clone 使用 React、ReactQuery、Tailwindcss、Redux、Socket IO、NodeJS、Express、MongoDB 和 Redis .zip
- 任务向量与算术操作:一种高效模型编辑方法
- 基于亚控KingFusion平台的燃气云平台应用
- decrypt-redis-2.8 源代码注释.zip
- 4511895645616541
- 基于工业数据库的油气田生产数据采集与管理应用
- CVE-2014-4210+Redis 未授权访问.zip
- 01111111111
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功