#!/usr/bin/env python
from __future__ import print_function # 新版本特性
#任何eval,run返回的都是numpy
import tensorflow as tf
import cv2
import sys
sys.path.append("game/")
import wrapped_flappy_bird as game
import random
import numpy as np
from collections import deque # 双端队列
# 参数
GAME = 'bird' # 游戏名称
ACTIONS = 2 # 动作种类 上or下
GAMMA = 0.99 # Q-learning 衰减率α
OBSERVE = 40. # 经验池的样本数
EXPLORE = 200000. # frames over which to anneal epsilon
FINAL_EPSILON = 0.001 # 结束探索时候的选择动作的ε概率
INITIAL_EPSILON = 0.01 # 开始探索时候的选择动作的ε概率
REPLAY_MEMORY = 50000 # 经验池的最大内存
BATCH = 32 # 随机抽样的样本数
FRAME_PER_ACTION = 1
UPDATE_TIME = 100 #更新目标网络
class SumTree(object):
def __init__(self, capacity):
self.capacity = capacity # for all priority values
self.tree = np.zeros(2 * capacity - 1) # store priority
# [--------------Parent nodes-------------][-------leaves to recode priority-------]
# size: capacity - 1 size: capacity
self.data = np.zeros(capacity, dtype=tuple) # for all transitions
# [--------------data frame-------------]
# size: capacity
self.size = 0
self.data_pointer = 0
def add(self, p, data):
tree_idx = self.data_pointer + (self.capacity - 1) # 树中数据的位置
# the tree index:self.capacity -1 is the position of the first data.
self.data[self.data_pointer] = data # update data_frame
self.update(tree_idx, p) # update tree_frame
self.data_pointer += 1 # 指针始终指向下一个储存的位置
if self.data_pointer >= self.capacity: # replace when exceed the capacity
self.data_pointer = 0
if self.size < self.capacity:
self.size += 1
#更新权重
def update(self, tree_idx, p):
change = p - self.tree[tree_idx]
self.tree[tree_idx] = p
# then propagate the change through tree
while tree_idx != 0: # this method is faster than the recursive loop in the reference code
tree_idx = (tree_idx - 1) // 2
self.tree[tree_idx] += change
def get_min_prob(self):
#切片[:]叶子节点中最小权重
return min(self.tree[self.capacity-1 : self.capacity + self.size - 1])/self.total_p()
#根据均匀采样值查询到对应区间的叶子节点
def get_leaf(self, v):
"""
Tree structure and array storage:
Tree index:
0 -> storing priority sum
/ \
1 2
/ \ / \
3 4 5 6 -> storing priority for transitions
Array type for storing:
[0,1,2,3,4,5,6]
42
29 13
13 16 3 10
3 10 12 4 1 2 8 2
权重树
"""
parent_idx = 0
while True: # the while loop is faster than the method in the reference code
cl_idx = 2 * parent_idx + 1 # this leaf's left and right kids
cr_idx = cl_idx + 1
if cl_idx >= len(self.tree): # reach bottom, end search
leaf_idx = parent_idx
break
else: # downward search, always search for a higher priority node
if v <= self.tree[cl_idx]:
parent_idx = cl_idx
else:
v -= self.tree[cl_idx]
parent_idx = cr_idx
data_idx = leaf_idx - self.capacity + 1
return leaf_idx, self.tree[leaf_idx], self.data[data_idx]
#优先级
def total_p(self):
return self.tree[0] # the root
class Memory(object): # stored as ( s, a, r, s_ ) in SumTree
"""
This SumTree code is modified version and the original code is from:
https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py
"""
epsilon = 0.01 # small amount to avoid zero priority
alpha = 0.6 # [0~1] convert the importance of TD error to priority
beta = 0.4 # importance-sampling, from initial value increasing to 1
beta_increment_per_sampling = 0.001
abs_err_upper = 1. # clipped abs error 表明p的范围在[epsilon,abs_err_upper]之间
def __init__(self, capacity):
self.sum_tree = SumTree(capacity)
def store(self, transition):
max_p = np.max(self.sum_tree.tree[-self.sum_tree.capacity:])#叶子节点的权重
if max_p == 0: #第一条存储的数据,我们认为它的优先级P是最大的,同时,对于新来的数据,我们也认为它的优先级与当前树中优先级最大的经验相同。
max_p = self.abs_err_upper
self.sum_tree.add(max_p, transition) # set the max p for new p
#采样公式待理解
def sample(self, n):
# tt = self.tree.tree
# dd = self.tree.data
b_idx, b_memory, ISWeights = np.empty((n,), dtype=np.int32), np.empty((n,), dtype=tuple), np.empty(
(n, 1))
pri_seg = self.sum_tree.total_p() / n # priority segment
self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1
for i in range(n):
a, b = pri_seg * i, pri_seg * (i + 1)
v = np.random.uniform(a, b)
idx, p, data = self.sum_tree.get_leaf(v)
prob = p / self.sum_tree.total_p()
# aa = prob
# bb = min_prob
min_prob = self.sum_tree.get_min_prob()
ISWeights[i, 0] = np.power(prob / min_prob, -self.beta)
b_idx[i], b_memory[i] = idx, data
return b_idx, b_memory, ISWeights
def batch_update(self, tree_idx, abs_errors):
abs_errors += self.epsilon # convert to abs and avoid 0
clipped_errors = np.minimum(abs_errors, self.abs_err_upper)
ps = np.power(clipped_errors, self.alpha)
for ti, p in zip(tree_idx, ps):
self.sum_tree.update(ti, p)
class DQN_DUELING:
def __init__(self):
# 初始化经验池
self.memory = Memory(capacity=REPLAY_MEMORY)
# 初始化步数 检测模型保存和EPSILION的改变
self.timeStep = 0
self.epsilon = INITIAL_EPSILON
# 初始化当前Q网络
self.stateInput,self.QValue,self.W_conv1,self.b_conv1,self.W_conv2,self.b_conv2,self.W_conv3,self.b_conv3,self.W_fc1,self.b_fc1,self.W_fc2A,self.b_fc2A,self.W_fc2V,self.b_fc2V = self.createNetwork()
# 初始化目标Q网络
self.stateInputT,self.QValueT,self.W_conv1T,self.b_conv1T,self.W_conv2T,self.b_conv2T,self.W_conv3T,self.b_conv3T,self.W_fc1T,self.b_fc1T,self.W_fc2AT,self.b_fc2AT,self.W_fc2VT,self.b_fc2VT = self.createNetwork()
#将当前Q网络赋值给目标Q网络 tf.assign为赋值操作
self.copyTargetQNetworkOperation = [self.W_conv1T.assign(self.W_conv1),self.b_conv1T.assign(self.b_conv1),self.W_conv2T.assign(self.W_conv2),self.b_conv2T.assign(self.b_conv2),self.W_conv3T.assign(self.W_conv3),self.b_conv3T.assign(self.b_conv3),self.W_fc1T.assign(self.W_fc1),self.b_fc1T.assign(self.b_fc1),self.W_fc2AT.assign(self.W_fc2A),self.b_fc2AT.assign(self.b_fc2A),self.W_fc2VT.assign(self.W_fc2V),self.b_fc2VT.assign(self.b_fc2V)]
#初始化损失函数
self.createTrainingMethod()
# 保存和加载网络模型
# TensorFlow采用Saver来保存。一般在Session()建立之前,通过tf.train.Saver()获取Saver实例
self.saver = tf.train.Saver()
self.sess = tf.InteractiveSession()
self.sess.run(tf.initialize_all_variables())
#如果检查点存在就载入已经有的模型
checkpoint = tf.train.get_checkpoint_state("saved_networks")
if checkpoint and checkpoint.model_checkpoint_path:
self.saver.restore(sess, checkpoint.model_checkpoint_path)
print("Successfully loaded:", checkpoint.model_checkpoint_path)
else:
print("Could not find old network weights")
# 初始化当前状态
def setInitState(self,observation):
self.currentState = np.stack((observation, observation, observation, observation), axis = 2)
# 构建CNN卷积神经网络
# 权重 tf.truncated_normal(shape, mean, stddev):
# shape表示生成张量的维度,mean是均值,stddev是标准差 一个截断的产生正太分布的函数
# TensorFlow的世界里,变量的定义和初始化是分开的 tf.Variable(initializer,name),initializer是初始化参数,name是可自定义的变量名称
def weight_variable(self, shape)
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![caj](https://img-home.csdnimg.cn/images/20210720083646.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
收起资源包目录
![package](https://csdnimg.cn/release/downloadcmsfe/public/img/package.f3fc750b.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
共 5 条
- 1
资源评论
![avatar-default](https://csdnimg.cn/release/downloadcmsfe/public/img/lazyLogo2.1882d7f4.png)
![avatar](https://profile-avatar.csdnimg.cn/d5b8e16cddf148da83293d5bb68d9224_admin_maxin.jpg!1)
博士僧小星
- 粉丝: 1945
- 资源: 5903
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助
![voice](https://csdnimg.cn/release/downloadcmsfe/public/img/voice.245cc511.png)
![center-task](https://csdnimg.cn/release/downloadcmsfe/public/img/center-task.c2eda91a.png)
安全验证
文档复制为VIP权益,开通VIP直接复制
![dialog-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/green-success.6a4acb44.png)