import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions.categorical import Categorical
from torch.utils.tensorboard import SummaryWriter
from einops import rearrange
from einops.layers.torch import Rearrange
import gym
from env import MultiSatelliteEnv
import argparse
import utils
import time
def get_args():
# 获取yaml参数
parser = argparse.ArgumentParser()
parser.add_argument('--config-file', dest='config_file', default='config_env.yml')
parser.add_argument('--dataset-name', dest='dataset_name', default='env')
args = utils.read_config_file(parser)
return args
def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
torch.nn.init.orthogonal_(layer.weight, std)
torch.nn.init.constant_(layer.bias, bias_const)
return layer
class Agent(nn.Module):
def __init__(self, env, action_space, num_nn, critic_std, actor_std):
super().__init__()
# 卷积层
self.network = nn.Sequential(
Rearrange('b d -> b 1 d'), # 将形状为(batch size, dim)的输入x进行变换,转换为形状为(batch size, 1, dim)的张量
nn.Conv1d(in_channels=1, out_channels=2, kernel_size=7, stride=7, padding=3), # 3个一维卷积层,每层包含一个卷积操作、ReLU激活函数和padding操作
nn.ReLU(),
nn.Conv1d(in_channels=2, out_channels=4, kernel_size=7, stride=7, padding=3),
nn.ReLU(),
nn.Conv1d(in_channels=4, out_channels=8, kernel_size=7, stride=7, padding=3),
nn.Flatten(), # 将卷积层输出的多维数据展平为向量
)
self.action_space = action_space
self.critic = nn.Sequential( # critic网络,2个线性层,输入尺寸为288,输出尺寸为1
layer_init(nn.Linear(288, num_nn)), # np.array(env.observation_space.shape).prod()
nn.ReLU(),
layer_init(nn.Linear(num_nn, 1), std=critic_std),
)
self.actor = nn.Sequential(
layer_init(nn.Linear(288, num_nn)), # np.array(env.observation_space.shape).prod()
nn.ReLU(),
layer_init(nn.Linear(num_nn, sum(action_space)), std=actor_std), # 输出尺寸为动作空间大小
)
def get_value(self, x):
''' 估计状态价值
Args:
x: 状态
Returns:
critic估计状态价值
'''
return self.critic(self.network(x))
def get_action_and_value(self, x, action=None):
''' 选取动作或计算状态价值
Args:
x: 状态
action: 在更新网络参数时输入action
Returns:
action: 在采样阶段输出动作
对数概率
交叉熵
状态价值
'''
logits = self.actor(self.network(x)) # 尺寸为(1,nsat*动作空间尺寸)
split_logits = torch.split(logits, self.action_space, dim=1) # 拆分logits为块,每块儿为action_space大小
multi_categoricals = [Categorical(logits=logits) for logits in split_logits] # 分类,决定采取哪个动作
if action is None:
action = torch.stack([categorical.sample() for categorical in multi_categoricals]).T # 对每个卫星输出动作的采样结果
logprob = torch.stack([categorical.log_prob(a) for a, categorical in zip(action.T, multi_categoricals)]) # 输出采样的得到动作的对数概率
entropy = torch.stack([categorical.entropy() for categorical in multi_categoricals]).T # 计算交叉熵
return action, logprob.sum(0), entropy.sum(0), self.critic(self.network(x))
def train(env, name, action_space, target_kl, minibatch_size, gamma, ent_coef, vf_coef, num_nn, critic_std, actor_std,
learning_rate, num_epoch_steps, seed):
# 配置超参数,这部分超参数固定在算法中
total_timesteps = 100000 # How many steps you interact with the env
num_env_steps = 128 # How many steps you interact with the env before an update
num_update_steps = 4 # How many times you update the neural networks after interation
gae_lambda = 0.95 # Parameter in advantage estimation
max_grad_norm = 0.5 # max norm of the gradient vector
clip_coef = 0.2 # Parameter to clip the (p_new/p_old) ratio
writer = SummaryWriter('runs/' + name) # 创建一个基于Tensorboard的writer对象,用于记录训练过程中的数据
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 初始化模型和优化器
agent = Agent(env, action_space, num_nn, critic_std, actor_std).to(device)
optimizer = optim.Adam(agent.parameters(), lr=learning_rate, eps=1e-5) # 定义优化器,优化优化智能体策略,即网络的参数
# seeding
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True # 使用固定随机数种子,从而保证实验结果可重现
# Initialize storage for a round
obs = torch.zeros((num_env_steps, env.observation_space.shape[0])).to(device) # 初始化一个tensor,用于存储观测值
actions = torch.zeros(num_env_steps, len(action_space)).to(device)
logprobs = torch.zeros(num_env_steps).to(device)
rewards = torch.zeros(num_env_steps).to(device)
dones = torch.zeros(num_env_steps).to(device)
values = torch.zeros(num_env_steps).to(device)
next_obs = torch.Tensor(env.reset()).to(device)
next_done = torch.zeros(1).to(device)
global_step = 0 # 定义全局步数
cumu_rewards = 0 # 定义累计奖励
num_rounds = total_timesteps // num_env_steps # 训练回合数
for round in range(num_rounds):
# action logic
for step in range(num_env_steps):
global_step += 1
obs[step] = next_obs
dones[step] = next_done
with torch.no_grad(): # 禁止梯度计算,减少内存占用和计算时间
action, logprob, _, value = agent.get_action_and_value(next_obs.reshape((1, -1))) # 采样历史数据
action = action.flatten()
values[step] = value.flatten()
actions[step] = action
logprobs[step] = logprob
# execute the game and log data.
next_obs, reward, done, info = env.step(action.cpu()) # 执行动作,状态转移,计算奖励
cumu_rewards += reward # 累计奖励
if done == True: # 回合终止
writer.add_scalar("cumulative rewards", cumu_rewards, global_step) # 在Tensorboard中记录累计奖励
print("global step:", global_step, "cumulative rewards:", cumu_rewards)
cumu_rewards = 0 # 清空累积奖励
rewards[step] = torch.tensor(reward).to(device).view(-1)
next_obs, next_done = torch.Tensor(next_obs).to(device), torch.Tensor([done]).to(device)
# bootstrap value if not done
with torch.no_grad():
next_value = agent.get_value(next_obs.reshape(1, -1))
advantages = torch.zeros_like(rewards).to(device)
lastgaelam = 0
for t in reversed(range(num_env_steps)): # 反向遍历
if t == num_env_steps - 1:
nextnonterminal = 1.0 - next_done
else:
nextnonterminal = 1.0 - dones[t + 1]
next_value = values[t + 1]
delta = rewards[t] + gamma * next_value * nextnonterminal - values[t]
advantages[t] = lastgaelam = delta + gamma * gae_lambda * nextnonterminal * lastgaelam
returns = advantages + values
# flatten the batch
b_obs = obs.reshape((-1,) + env.observation_space.shape)
b_logprobs = logprobs.reshape(-1)
b_actions = actions.reshape((-1,) + env.action_space.shape)
b_advantages = advantages.reshape(-1)
b_returns = returns.reshape(-1)
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
基于深度强化学习算法实现多星对区域目标观测的规划.zip (494个子文件)
Fermi1.csv 2.34MB
Integral.csv 736KB
swift.csv 343KB
Fermi2021.csv 146KB
Integral2021.csv 141KB
Swift2021.csv 55KB
Fermi+Integral+Swift2021.csv 54KB
GraceDB.csv 7KB
.DS_Store 26KB
.DS_Store 10KB
.DS_Store 10KB
.DS_Store 10KB
.DS_Store 8KB
.DS_Store 6KB
.DS_Store 6KB
.DS_Store 6KB
.DS_Store 6KB
.DS_Store 6KB
.DS_Store 6KB
S190930t_MultiOrder.multiorder.fits 759KB
S190718y_MultiOrder.multiorder.fits 759KB
S191205ah_MultiOrder.multiorder.fits 759KB
S190923y_MultiOrder.multiorder.fits 759KB
S191204r_MultiOrder.multiorder.fits 759KB
S200116ah_MultiOrder.multiorder.fits 759KB
S191109d_MultiOrder.multiorder.fits 759KB
S190901ap_MultiOrder.multiorder.fits 759KB
S190706ai_MultiOrder.multiorder.fits 669KB
S190728q_MultiOrder.multiorder.fits 669KB
S190727h_MultiOrder.multiorder.fits 669KB
S190828l_MultiOrder.multiorder.fits 669KB
S190513bm_MultiOrder.multiorder.fits 669KB
S190517h_MultiOrder.multiorder.fits 669KB
S200225q_MultiOrder.multiorder.fits 669KB
S190630ag_MultiOrder.multiorder.fits 669KB
S200112r_MultiOrder.multiorder.fits 669KB
S190421ar_MultiOrder.multiorder.fits 669KB
S190828j_MultiOrder.multiorder.fits 669KB
S190408an_MultiOrder.multiorder.fits 669KB
S190510g_MultiOrder.fits 669KB
S191129u_MultiOrder.multiorder.fits 669KB
S200128d_MultiOrder.multiorder.fits 669KB
S190930s_MultiOrder.multiorder.fits 669KB
S190910d_MultiOrder.multiorder.fits 669KB
S200115j_MultiOrder.multiorder.fits 669KB
S200224ca_MultiOrder.multiorder.fits 669KB
S200316bj_MultiOrder.multiorder.fits 669KB
S190519bj_MultiOrder.multiorder.fits 669KB
S191216ap_MultiOrder.multiorder.fits 669KB
S200219ac_MultiOrder.multiorder.fits 669KB
S190425z_MultiOrder.multiorder.fits 669KB
S200105ae_MultiOrder.multiorder.fits 669KB
S190412m_MultiOrder.multiorder.fits 669KB
S190910h_MultiOrder.multiorder.fits 669KB
S191105e_MultiOrder.multiorder.fits 669KB
S191215w_MultiOrder.multiorder.fits 669KB
S200129m_MultiOrder.multiorder.fits 669KB
S191222n_MultiOrder.multiorder.fits 669KB
S200213t_MultiOrder.multiorder.fits 669KB
S200302c_MultiOrder.multiorder.fits 669KB
S190707q_MultiOrder.multiorder.fits 669KB
S190426c_MultiOrder.multiorder.fits 669KB
S190701ah_MultiOrder.multiorder.fits 669KB
S200311bg_MultiOrder.multiorder.fits 669KB
S190924h_MultiOrder.multiorder.fits 669KB
S190503bf_MultiOrder.multiorder.fits 669KB
S190602aq_MultiOrder.multiorder.fits 669KB
S190512at_MultiOrder.multiorder.fits 669KB
S190915ak_MultiOrder.multiorder.fits 669KB
S191213g_MultiOrder.multiorder.fits 669KB
S190814bv_MultiOrder.multiorder.fits 669KB
S190720a_MultiOrder.multiorder.fits 669KB
S190521g_MultiOrder.multiorder.fits 669KB
S190521r_MultiOrder.multiorder.fits 669KB
S200208q_MultiOrder.multiorder.fits 669KB
.gitignore 6B
S191204r_Flat.fits.gz 4.35MB
S230927be_Flat.fits.gz 1.62MB
S200116ah_Flat.fits.gz 1.54MB
S190718y_Flat.fits.gz 1.5MB
S190512at_Flat.fits.gz 1.48MB
S190426c_Flat.fits.gz 1.48MB
S190720a_Flat.fits.gz 1.48MB
S230627c_Flat.fits.gz 1.47MB
S200208q_Flat.fits.gz 1.47MB
S190728q_Flat.fits.gz 1.47MB
S190412m_Flat.fits.gz 1.47MB
S190828j_Flat.fits.gz 1.47MB
S190517h_Flat.fits.gz 1.47MB
S200225q_Flat.fits.gz 1.46MB
S190503bf_Flat.fits.gz 1.45MB
S190510g_Flat.fits.gz 1.45MB
S200115j_Flat.fits.gz 1.45MB
S200316bj_Flat.fits.gz 1.45MB
S191105e_Flat.fits.gz 1.44MB
S191215w_Flat.fits.gz 1.44MB
S200224ca_Flat.fits.gz 1.44MB
S190814bv_Flat.fits.gz 1.42MB
S200311bg_Flat.fits.gz 1.42MB
S190408an_Flat.fits.gz 1.42MB
共 494 条
- 1
- 2
- 3
- 4
- 5
资源评论
Nowl
- 粉丝: 1w+
- 资源: 3556
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功