close all;
clear;
clc;
open_system('rlwatertank');
numObs = 3; %状态变量个数
obsInfo = rlNumericSpec([numObs 1],...
'LowerLimit',[-inf -inf 0]',...
'UpperLimit',[ inf inf inf]');
obsInfo.Name = 'observation';
obsInfo.Description = 'integrated error, error';
numObservations = obsInfo.Dimension(1);
numAct =1; %执行变量
actInfo = rlNumericSpec([numAct 1],...
'LowerLimit',[-500.0]',...
'UpperLimit',[500.0]');
actInfo.Name = 'flow';
numActions = actInfo.Dimension(1);
env = rlSimulinkEnv('rlwatertank','rlwatertank/RL Agent',...
obsInfo,actInfo);
env.ResetFcn = @(in)localResetFcn(in);
Ts = 0.1;
Tf = 20;
rng(0)
% 包括两个路径:状态路径 + 动作路径
% 使用深度学习工具箱提供的函数定义这个网络的各层
statePath = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','State')
fullyConnectedLayer(50,'Name','CriticStateFC1')
reluLayer('Name','CriticRelu1')
fullyConnectedLayer(25,'Name','CriticStateFC2')];
actionPath = [
imageInputLayer([numActions 1 1],'Normalization','none','Name','Action')
fullyConnectedLayer(25,'Name','CriticActionFC1')];
commonPath = [
additionLayer(2,'Name','add')
reluLayer('Name','CriticCommonRelu')
fullyConnectedLayer(1,'Name','CriticOutput')];
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,'CriticStateFC2','add/in1');
criticNetwork = connectLayers(criticNetwork,'CriticActionFC1','add/in2');
figure
plot(criticNetwork)
criticOpts = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1);
critic = rlQValueRepresentation(criticNetwork,obsInfo,actInfo,'Observation',{'State'},'Action',{'Action'},criticOpts);
actorNetwork = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','State')
fullyConnectedLayer(3, 'Name','actorFC')
tanhLayer('Name','actorTanh')
fullyConnectedLayer(numActions,'Name','Action')
];
actorOptions = rlRepresentationOptions('LearnRate',1e-04,'GradientThreshold',1);
actor = rlDeterministicActorRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'State'},'Action',{'Action'},actorOptions);
agentOpts = rlDDPGAgentOptions(...
'SampleTime',Ts,...
'TargetSmoothFactor',1e-3,...
'DiscountFactor',1.0, ...
'MiniBatchSize',64, ...
'ExperienceBufferLength',1e6);
agentOpts.NoiseOptions.Variance = 0.3;
agentOpts.NoiseOptions.VarianceDecayRate = 1e-5;
agent = rlDDPGAgent(actor,critic,agentOpts);
maxepisodes = 500;
maxsteps = ceil(Tf/Ts);
trainOpts = rlTrainingOptions(...
'MaxEpisodes',maxepisodes, ...
'MaxStepsPerEpisode',maxsteps, ...
'ScoreAveragingWindowLength',20, ...
'Verbose',false, ...
'Plots','training-progress',...
'StopTrainingCriteria','AverageReward',...
'StopTrainingValue',800);
doTraining = true;
if doTraining
% Train the agent.
trainingStats = train(agent,env,trainOpts);
else
% Load the pretrained agent for the example.
load('WaterTankDDPG.mat','agent')
end
simOpts = rlSimulationOptions('MaxSteps',maxsteps,'StopOnError','on');
experiences = sim(env,agent,simOpts);
plot(experiences.Observation.observation) %绘制
axis([0,10,-20.0,20.0])
function in = localResetFcn(in)
% randomize reference signal
blk = sprintf('rlwatertank/Desired \nWater Level');
h = 3*randn + 10;
while h <= 0 || h >= 20
h = 3*randn + 10;
end
in = setBlockParameter(in,blk,'Value',num2str(h));
% randomize initial height
h = 3*randn + 10;
while h <= 0 || h >= 20
h = 3*randn + 10;
end
blk = 'rlwatertank/Water-Tank System/H';
in = setBlockParameter(in,blk,'InitialCondition',num2str(h));
end
基于simulink仿真DDPG控制水箱.zip
版权申诉
61 浏览量
2023-06-06
16:59:56
上传
评论 1
收藏 45KB ZIP 举报
Matlab科研辅导帮
- 粉丝: 1w+
- 资源: 7491
最新资源
- STC15单片机串口2使用程序例子
- 读取日志的excel生成周报 用python3开发weekplan-master.zip
- python 读取excel数据导入dbimport-data-master.zip
- K折交叉验证BP神经网络,多输入多输出BP神经网络(代码完整,数据齐全)
- B07训练原图.zip
- python-对Excel数据处理做可视化分析.zip
- 人工智能大作业-无人机图像目标检测的python源代码+文档说明.zip
- 基于GoogLeNet实现Cifar-10图像分类项目python源码(高分项目).zip
- 数据库 sql 面试题目及答案解析.docx
- 汽车常见 10 种传感器故障后的表现与解决措施.docx
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈