基于深度强化学习的FlappyBird，集成了目前主流深度强化学习的算法和优化算法.zip

共5个文件

py：5个

版权申诉

深度学习

python

人工智能

101 浏览量 2024-02-20 13:13:56 上传评论收藏 27KB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于深度强化学习的FlappyBird，集成了目前主流深度强化学习的算法和优化算法.zip （5个子文件）

DeepLearningFlappyBird-master

DQN-NATURE.py 11KB

PrioritizedDQN.py 16KB

DuelingDQN.py 17KB

DQN-NIPS.py 10KB

DDQN.py 11KB

#!/usr/bin/env python from __future__ import print_function # 新版本特性 #任何eval，run返回的都是numpy import tensorflow as tf import cv2 import sys sys.path.append("game/") import wrapped_flappy_bird as game import random import numpy as np from collections import deque # 双端队列 # 参数 GAME = 'bird' # 游戏名称 ACTIONS = 2 # 动作种类上or下 GAMMA = 0.99 # Q-learning 衰减率α OBSERVE = 40. # 经验池的样本数 EXPLORE = 200000. # frames over which to anneal epsilon FINAL_EPSILON = 0.001 # 结束探索时候的选择动作的ε概率 INITIAL_EPSILON = 0.01 # 开始探索时候的选择动作的ε概率 REPLAY_MEMORY = 50000 # 经验池的最大内存 BATCH = 32 # 随机抽样的样本数 FRAME_PER_ACTION = 1 UPDATE_TIME = 100 #更新目标网络 class SumTree(object): def __init__(self, capacity): self.capacity = capacity # for all priority values self.tree = np.zeros(2 * capacity - 1) # store priority # [--------------Parent nodes-------------][-------leaves to recode priority-------] # size: capacity - 1 size: capacity self.data = np.zeros(capacity, dtype=tuple) # for all transitions # [--------------data frame-------------] # size: capacity self.size = 0 self.data_pointer = 0 def add(self, p, data): tree_idx = self.data_pointer + (self.capacity - 1) # 树中数据的位置 # the tree index:self.capacity -1 is the position of the first data. self.data[self.data_pointer] = data # update data_frame self.update(tree_idx, p) # update tree_frame self.data_pointer += 1 # 指针始终指向下一个储存的位置 if self.data_pointer >= self.capacity: # replace when exceed the capacity self.data_pointer = 0 if self.size < self.capacity: self.size += 1 #更新权重 def update(self, tree_idx, p): change = p - self.tree[tree_idx] self.tree[tree_idx] = p # then propagate the change through tree while tree_idx != 0: # this method is faster than the recursive loop in the reference code tree_idx = (tree_idx - 1) // 2 self.tree[tree_idx] += change def get_min_prob(self): #切片[:]叶子节点中最小权重 return min(self.tree[self.capacity-1 : self.capacity + self.size - 1])/self.total_p() #根据均匀采样值查询到对应区间的叶子节点 def get_leaf(self, v): """ Tree structure and array storage: Tree index: 0 -> storing priority sum / \ 1 2 / \ / \ 3 4 5 6 -> storing priority for transitions Array type for storing: [0,1,2,3,4,5,6] 42 29 13 13 16 3 10 3 10 12 4 1 2 8 2 权重树 """ parent_idx = 0 while True: # the while loop is faster than the method in the reference code cl_idx = 2 * parent_idx + 1 # this leaf's left and right kids cr_idx = cl_idx + 1 if cl_idx >= len(self.tree): # reach bottom, end search leaf_idx = parent_idx break else: # downward search, always search for a higher priority node if v <= self.tree[cl_idx]: parent_idx = cl_idx else: v -= self.tree[cl_idx] parent_idx = cr_idx data_idx = leaf_idx - self.capacity + 1 return leaf_idx, self.tree[leaf_idx], self.data[data_idx] #优先级 def total_p(self): return self.tree[0] # the root class Memory(object): # stored as ( s, a, r, s_ ) in SumTree """ This SumTree code is modified version and the original code is from: https://github.com/jaara/AI-blog/blob/master/Seaquest-DDQN-PER.py """ epsilon = 0.01 # small amount to avoid zero priority alpha = 0.6 # [0~1] convert the importance of TD error to priority beta = 0.4 # importance-sampling, from initial value increasing to 1 beta_increment_per_sampling = 0.001 abs_err_upper = 1. # clipped abs error 表明p的范围在[epsilon,abs_err_upper]之间 def __init__(self, capacity): self.sum_tree = SumTree(capacity) def store(self, transition): max_p = np.max(self.sum_tree.tree[-self.sum_tree.capacity:])#叶子节点的权重 if max_p == 0: #第一条存储的数据，我们认为它的优先级P是最大的，同时，对于新来的数据，我们也认为它的优先级与当前树中优先级最大的经验相同。 max_p = self.abs_err_upper self.sum_tree.add(max_p, transition) # set the max p for new p #采样公式待理解 def sample(self, n): # tt = self.tree.tree # dd = self.tree.data b_idx, b_memory, ISWeights = np.empty((n,), dtype=np.int32), np.empty((n,), dtype=tuple), np.empty( (n, 1)) pri_seg = self.sum_tree.total_p() / n # priority segment self.beta = np.min([1., self.beta + self.beta_increment_per_sampling]) # max = 1 for i in range(n): a, b = pri_seg * i, pri_seg * (i + 1) v = np.random.uniform(a, b) idx, p, data = self.sum_tree.get_leaf(v) prob = p / self.sum_tree.total_p() # aa = prob # bb = min_prob min_prob = self.sum_tree.get_min_prob() ISWeights[i, 0] = np.power(prob / min_prob, -self.beta) b_idx[i], b_memory[i] = idx, data return b_idx, b_memory, ISWeights def batch_update(self, tree_idx, abs_errors): abs_errors += self.epsilon # convert to abs and avoid 0 clipped_errors = np.minimum(abs_errors, self.abs_err_upper) ps = np.power(clipped_errors, self.alpha) for ti, p in zip(tree_idx, ps): self.sum_tree.update(ti, p) class DQN_DUELING: def __init__(self): # 初始化经验池 self.memory = Memory(capacity=REPLAY_MEMORY) # 初始化步数检测模型保存和EPSILION的改变 self.timeStep = 0 self.epsilon = INITIAL_EPSILON # 初始化当前Q网络 self.stateInput,self.QValue,self.W_conv1,self.b_conv1,self.W_conv2,self.b_conv2,self.W_conv3,self.b_conv3,self.W_fc1,self.b_fc1,self.W_fc2A,self.b_fc2A,self.W_fc2V,self.b_fc2V = self.createNetwork() # 初始化目标Q网络 self.stateInputT,self.QValueT,self.W_conv1T,self.b_conv1T,self.W_conv2T,self.b_conv2T,self.W_conv3T,self.b_conv3T,self.W_fc1T,self.b_fc1T,self.W_fc2AT,self.b_fc2AT,self.W_fc2VT,self.b_fc2VT = self.createNetwork() #将当前Q网络赋值给目标Q网络 tf.assign为赋值操作 self.copyTargetQNetworkOperation = [self.W_conv1T.assign(self.W_conv1),self.b_conv1T.assign(self.b_conv1),self.W_conv2T.assign(self.W_conv2),self.b_conv2T.assign(self.b_conv2),self.W_conv3T.assign(self.W_conv3),self.b_conv3T.assign(self.b_conv3),self.W_fc1T.assign(self.W_fc1),self.b_fc1T.assign(self.b_fc1),self.W_fc2AT.assign(self.W_fc2A),self.b_fc2AT.assign(self.b_fc2A),self.W_fc2VT.assign(self.W_fc2V),self.b_fc2VT.assign(self.b_fc2V)] #初始化损失函数 self.createTrainingMethod() # 保存和加载网络模型 # TensorFlow采用Saver来保存。一般在Session()建立之前，通过tf.train.Saver()获取Saver实例 self.saver = tf.train.Saver() self.sess = tf.InteractiveSession() self.sess.run(tf.initialize_all_variables()) #如果检查点存在就载入已经有的模型 checkpoint = tf.train.get_checkpoint_state("saved_networks") if checkpoint and checkpoint.model_checkpoint_path: self.saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") # 初始化当前状态 def setInitState(self,observation): self.currentState = np.stack((observation, observation, observation, observation), axis = 2) # 构建CNN卷积神经网络 # 权重 tf.truncated_normal(shape, mean, stddev): # shape表示生成张量的维度，mean是均值，stddev是标准差一个截断的产生正太分布的函数 # TensorFlow的世界里，变量的定义和初始化是分开的 tf.Variable(initializer,name),initializer是初始化参数，name是可自定义的变量名称 def weight_variable(self, shape)

评论收藏

内容反馈

版权申诉