open_system('rlwatertank')
obsInfo = rlNumericSpec([3 1],...
'LowerLimit',[-inf -inf 0 ]',...
'UpperLimit',[ inf inf inf]');
obsInfo.Name = 'observations';
obsInfo.Description = 'integrated error, error, and measured temperature';
numObservations = obsInfo.Dimension(1);
actInfo = rlNumericSpec([1 1]);
actInfo.Name = 'flow';
numActions = actInfo.Dimension(1);
env = rlSimulinkEnv('rlwatertank','rlwatertank/RL Agent',...
obsInfo,actInfo);
env.ResetFcn = @(in)localResetFcn(in);
Ts = 1.0;
Tf = 200;
rng(0)
statePath = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','State')
fullyConnectedLayer(50,'Name','CriticStateFC1')
reluLayer('Name','CriticRelu1')
fullyConnectedLayer(25,'Name','CriticStateFC2')];
actionPath = [
imageInputLayer([numActions 1 1],'Normalization','none','Name','Action')
fullyConnectedLayer(25,'Name','CriticActionFC1')];
commonPath = [
additionLayer(2,'Name','add')
reluLayer('Name','CriticCommonRelu')
fullyConnectedLayer(1,'Name','CriticOutput')];
criticNetwork = layerGraph();
criticNetwork = addLayers(criticNetwork,statePath);
criticNetwork = addLayers(criticNetwork,actionPath);
criticNetwork = addLayers(criticNetwork,commonPath);
criticNetwork = connectLayers(criticNetwork,'CriticStateFC2','add/in1');
criticNetwork = connectLayers(criticNetwork,'CriticActionFC1','add/in2');
criticOpts = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1);
critic = rlRepresentation(criticNetwork,obsInfo,actInfo,'Observation',{'State'},'Action',{'Action'},criticOpts);
actorNetwork = [
imageInputLayer([numObservations 1 1],'Normalization','none','Name','State')
fullyConnectedLayer(3, 'Name','actorFC')
tanhLayer('Name','actorTanh')
fullyConnectedLayer(numActions,'Name','Action')
];
actorOptions = rlRepresentationOptions('LearnRate',1e-04,'GradientThreshold',1);
actor = rlRepresentation(actorNetwork,obsInfo,actInfo,'Observation',{'State'},'Action',{'Action'},actorOptions);
agentOpts = rlTD3AgentOptions(...
'SampleTime',Ts,...
'TargetSmoothFactor',1e-3,...
'DiscountFactor',0.99, ...
'MiniBatchSize',64, ...
'ExperienceBufferLength',1e6);
agentOpts.ExplorationModel.Variance = 0.9;
agentOpts.ExplorationModel.VarianceDecayRate = 1e-5;
agent = rlTD3Agent(actor,critic,agentOpts);
maxepisodes = 4000;
maxsteps = 400;
trainOpts = rlTrainingOptions(...
'MaxEpisodes',maxepisodes, ...
'MaxStepsPerEpisode',maxsteps, ...
'ScoreAveragingWindowLength',20, ...
'Verbose',false, ...
'Plots','training-progress',...
'StopTrainingCriteria','AverageReward',...
'StopTrainingValue',80000);
doTraining = true;
if doTraining
% Train the agent.
trainingStats = train(agent,env,trainOpts);
else
% Load pretrained agent for the example.
% load('WaterTankDDPG.mat','agent')
end
simOpts = rlSimulationOptions('MaxSteps',maxsteps,'StopOnError','off');
experiences = sim(env,agent,simOpts);
function in = localResetFcn(in)
% randomize reference signal
blk = sprintf('rlwatertank/Set Point');
%t = 3*randn + 70;
% while t <= 55 || t >= 85
% t = 3*randn + 70;
% end
t = 10*randn + 75;
while t <= 59 || t >= 122
t = 3*randn + 75;
end
in = setBlockParameter(in,blk,'Value',num2str(t));
end
- 1
- 2
- 3
前往页