import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
TARGET_REPLACE_ITER = 100
class Net(nn.Module):
def __init__(self, state_size, action_size, su_num):
super(Net, self).__init__()
self.state_size = state_size
self.action_size = action_size
self.su_num = su_num
self.Input_Layer = nn.Linear(in_features=self.state_size, out_features=256)
self.Input_Layer.weight.data.normal_(0, 0.1)
self.Advantage_Layer = nn.Linear(in_features=1024, out_features=1024)
self.Advantage_Layer.weight.data.normal_(0, 0.1)
#self.Value_Layer = nn.Linear(in_features=256, out_features=256)
#self.Value_Layer.weight.data.normal_(0, 0.1)
self.Output_Layer = nn.Linear(in_features=256, out_features=self.action_size)
self.Output_Layer.weight.data.normal_(0, 0.1)
def forward(self, input_states):
x = self.Input_Layer(input_states)
x = F.relu(x)
actions = self.Output_Layer(x)
return actions
class DQN(object):
def __init__(self, action_size, state_size, su_num, learning_rate, memory_size,
ch_num, batch_size, gamma):
#初始化
self.action_size = action_size
self.state_size = state_size
self.su_num = su_num
self.learning_rate = learning_rate
self.memory_size = memory_size
self.ch_num = ch_num
self.batch_size = batch_size
self.gamma = gamma
self.learn_step_counter = 0#更新
#经验池
self.memory_counter = 0
self.memory = np.zeros([self.memory_size, self.su_num, self.state_size*2+self.ch_num+1])
#输入的状态 = 用户标号1 + 通信类型1 + 上次动作(信道数) + 观测结果(信道数)
self.eval_net = Net(state_size=self.state_size, action_size=self.action_size, su_num=self.su_num)
self.target_net = Net(state_size=self.state_size, action_size=self.action_size, su_num=self.su_num)
self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=learning_rate)
self.loss_func = nn.MSELoss()
def choose_action(self, x, epsilon, rate, former_actions):
actions = np.zeros([self.su_num, self.ch_num], dtype=np.int)
inputs = torch.FloatTensor(x)
obs = np.zeros([self.ch_num], dtype=np.int)
for j in range(self.ch_num):
obs[j] = x[0, self.ch_num + 1 + j]
for each_user_id in range(self.su_num):
feed = torch.unsqueeze(inputs[each_user_id, :], 0)
if np.random.uniform() < epsilon:
q_eval_out = self.eval_net.forward(feed)
for i in range(self.ch_num):
fre = q_eval_out[0, i*2:i*2+2]
actions[each_user_id, i] = torch.max(fre, 0)[1].data.numpy()
elif rate == 1:
for i in range(self.ch_num):
actions[each_user_id, i] = former_actions[each_user_id, i]
else:
for i in range(self.ch_num):
#if x[each_user_id, -1] > 0:
#actions[each_user_id, i] = former_actions[each_user_id, i]
if obs[i] != 0:
actions[each_user_id, i] = 0
else:
actions[each_user_id, i] = np.random.randint(0, 2)
return actions
def store_memory(self, s, a, r, s_):
location = self.memory_counter % self.memory_size
for i in range(self.su_num):
transition = np.hstack((s[i, :], a[i, :], r[i], s_[i, :]))
self.memory[location, i, :] = transition
self.memory_counter += 1
def get_parameters(self, batch_samples):
s = []
a = []
r = []
s_ = []
for sample in batch_samples:
for each in sample:
s.append(each[0:self.state_size])
a.append(each[self.state_size:self.state_size + self.ch_num])
r.append(each[self.state_size + self.ch_num])
s_.append(each[self.state_size + self.ch_num + 1:])
return np.array(s), np.array(a), np.array(r), np.array(s_)
def learn(self, learning_rate):
if self.learn_step_counter % TARGET_REPLACE_ITER == 0:
self.target_net.load_state_dict(self.eval_net.state_dict())
self.learn_step_counter += 1
sample_index = np.random.choice(self.memory_size, self.batch_size)
sample_memory = self.memory[sample_index, :, :]
b_s, b_a, b_r, b_s_ = self.get_parameters(sample_memory)
bs = torch.FloatTensor(b_s)
for i in range(self.ch_num):
b_a[:,i] = b_a[:, i] + i*2
ba = torch.LongTensor(b_a)
br = torch.FloatTensor(b_r)
bs_ = torch.FloatTensor(b_s_)
q_eval = self.eval_net(bs).gather(dim=1, index=ba) # shape (batch, 1)
q_next = self.target_net(bs_).detach() # detach from graph, don't backpropagate 不反向更新网络
#m = q_next.size()
q_next_i = torch.zeros_like(ba, dtype=torch.float32)
q_target_i = torch.zeros_like(ba, dtype=torch.float32)
for i in range(self.ch_num):
fre = q_next[:, i * 2:i * 2 + 2]
q_next_i[:,i] = torch.max(fre, dim=1)[0]
q_target_i[:,i] = br + self.gamma * q_next_i[:,i] # shape (batch*su_num, 1)
#没想好怎么写成动态的??每次手动修改,下面为2个动作的结果
#q_target = torch.stack((q_target_i[:,0], q_target_i[:,1], q_target_i[:,2]), dim=-1)
if learning_rate < 1:
learning_rate *= 0.1
self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=learning_rate)
loss = self.loss_func(q_eval, q_target_i)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
没有合适的资源?快使用搜索试试~ 我知道了~
动态频谱接入DQN参考程序_强化学习_
共22个文件
xml:8个
py:6个
pyc:4个
5星 · 超过95%的资源 16 下载量 4 浏览量
2021-10-01
15:13:40
上传
评论 6
收藏 34KB ZIP 举报
温馨提示
通信强化学习部分,关于通信动态频谱接入,如何实现动态频谱接入
资源详情
资源评论
资源推荐
收起资源包目录
动态频谱接入DQN参考程序.zip (22个子文件)
动态频谱接入DQN参考程序
多载波DQN
env.py 3KB
c10-s4-sum.xlsx 5KB
.idea
misc.xml 188B
workspace.xml 9KB
encodings.xml 138B
modules.xml 283B
多载波DQN.iml 470B
__pycache__
RL_Brain.cpython-36.pyc 4KB
env.cpython-36.pyc 2KB
RL_Brain.py 6KB
Train.py 2KB
单载波DQN
env.py 3KB
XXXX.xlsx 5KB
.idea
misc.xml 188B
workspace.xml 8KB
encodings.xml 138B
modules.xml 283B
单载波DQN.iml 470B
__pycache__
RL_Brain.cpython-36.pyc 4KB
env.cpython-36.pyc 2KB
RL_Brain.py 5KB
Train.py 2KB
共 22 条
- 1
爱牛仕
- 粉丝: 96
- 资源: 4716
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论3