腾讯开悟-重返秘境模型（仅到终点）

共56个文件

py：31个

pyc：11个

toml：6个

深度学习

需积分: 3 77 浏览量 2024-09-01 22:02:09 上传评论收藏 4.69MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

1724929270FANx2CaqUfixENZdhhcVGsmRPdGnH9iP6730.zip （56个子文件）

.kaiwu.sign 853B

target_dqn

feature

__init__.py 0B

definition.py 12KB

__pycache__

definition.cpython-311.pyc 9KB

__init__.cpython-311.pyc 172B

__init__.py 0B

train_workflow.py 7KB

model

__init__.py 0B

model.py 2KB

__pycache__

__init__.cpython-311.pyc 170B

model.cpython-311.pyc 4KB

algorithm

__init__.py 0B

agent.py 9KB

__pycache__

agent.cpython-311.pyc 17KB

__init__.cpython-311.pyc 174B

__pycache__

config.cpython-311.pyc 1004B

__init__.cpython-311.pyc 164B

config.py 2KB

dqn

feature

__init__.py 0B

definition.py 12KB

__init__.py 0B

train_workflow.py 7KB

model

__init__.py 0B

model.py 2KB

algorithm

__init__.py 0B

agent.py 9KB

config.py 2KB

diy

feature

__init__.py 0B

definition.py 2KB

__init__.py 0B

train_workflow.py 839B

model

__init__.py 0B

model.py 418B

algorithm

__init__.py 0B

agent.py 1011B

config.py 1KB

ckpt

model.ckpt-6853.pkl 4.63MB

kaiwu.json 526B

id_list 28B

conf

__init__.py 0B

usr_conf.py 3KB

configure_app.toml 4KB

nature_app_back_to_the_realm.yaml 422B

back_to_the_realm_state.py 3KB

nature_algo_back_to_the_realm.yaml 1KB

configure_app.toml.bk 4KB

kaiwudrl

learner.toml 2KB

actor.toml 2KB

configure.toml 12KB

aisrv.toml 2KB

client.toml 325B

.gitignore 9B

__pycache__

__init__.cpython-311.pyc 158B

back_to_the_realm_state.cpython-311.pyc 4KB

back_to_the_realm_action.cpython-311.pyc 3KB

back_to_the_realm_action.py 1KB

#!/usr/bin/env python3 # -*- coding:utf-8 -*- """ @Project :back_to_the_realm @File :definition.py @Author :kaiwu @Date :2022/12/15 22:50 """ from kaiwu_agent.utils.common_func import create_cls, attached import numpy as np from kaiwu_agent.back_to_the_realm.target_dqn.feature_process import ( one_hot_encoding, read_relative_position, bump, ) # The create_cls function is used to dynamically create a class. The first parameter of the function is the type name, # and the remaining parameters are the attributes of the class, which should have a default value of None. # create_cls函数用于动态创建一个类，函数第一个参数为类型名称，剩余参数为类的属性，属性默认值应设为None ObsData = create_cls( "ObsData", feature=None, legal_act=None, ) ActData = create_cls( "ActData", move_dir=None, use_talent=None, ) SampleData = create_cls( "SampleData", obs=None, _obs=None, obs_legal=None, _obs_legal=None, act=None, rew=None, ret=None, done=None, ) def reward_shaping( frame_no, score, terminated, truncated, obs, _obs, env_info, _env_info ): reward = 0 # Get the current position coordinates of the agent # 获取当前智能体的位置坐标 pos = _env_info.frame_state.heroes[0].pos curr_pos_x, curr_pos_z = pos.x, pos.z # Get the grid-based distance of the current agent's position relative to the end point, buff, and treasure chest # 获取当前智能体的位置相对于终点, buff, 宝箱的栅格化距离 end_dist = _obs.feature.end_pos.grid_distance buff_dist = _obs.feature.buff_pos.grid_distance treasure_dists = [pos.grid_distance for pos in _obs.feature.treasure_pos] # Get the agent's position from the previous frame # 获取智能体上一帧的位置 prev_pos = env_info.frame_state.heroes[0].pos prev_pos_x, prev_pos_z = prev_pos.x, prev_pos.z # Get the grid-based distance of the agent's position from the previous # frame relative to the end point, buff, and treasure chest # 获取智能体上一帧相对于终点，buff, 宝箱的栅格化距离 prev_end_dist = obs.feature.end_pos.grid_distance prev_buff_dist = obs.feature.buff_pos.grid_distance prev_treasure_dists = [pos.grid_distance for pos in obs.feature.treasure_pos] # Get the status of the buff # 获取buff的状态 buff_availability = 0 for organ in env_info.frame_state.organs: if organ.sub_type == 2: buff_availability = 1 # Get the acceleration status of the agent # 获取智能体的加速状态 prev_speed_up = env_info.frame_state.heroes[0].speed_up speed_up = _env_info.frame_state.heroes[0].speed_up """ Reward 1. Reward related to the end point 奖励1. 与终点相关的奖励 """ reward_end_dist = 0 # Reward 1.1 Reward for getting closer to the end point # 奖励1.1 向终点靠近的奖励 # Boundary handling: At the first frame, prev_end_dist is initialized to 1, # and no reward is calculated at this time # 边界处理: 第一帧时prev_end_dist初始化为1，此时不计算奖励 if prev_end_dist != 1: reward_end_dist += 1 if end_dist < prev_end_dist else 0 # Reward 1.2 Reward for winning # 奖励1.2 获胜的奖励 reward_win = 0 if terminated: reward_win += 100 """ Reward 2. Rewards related to the treasure chest 奖励2. 与宝箱相关的奖励 """ reward_treasure_dist = 0 # Reward 2.1 Reward for getting closer to the treasure chest (only consider the nearest one) # 奖励2.1 向宝箱靠近的奖励(只考虑最近的那个宝箱) # Reward 2.2 Reward for getting the treasure chest # 奖励2.2 获得宝箱的奖励 reward_treasure = 0 if prev_treasure_dists.count(1.0) < treasure_dists.count(1.0): reward_treasure += 10 """ Reward 3. Rewards related to the buff 奖励3. 与buff相关的奖励 """ # Reward 3.1 Reward for getting closer to the buff # 奖励3.1 靠近buff的奖励 reward_buff_dist = 0 # Reward 3.2 Reward for getting the buff # 奖励3.2 获得buff的奖励 reward_buff = 0 """ Reward 4. Rewards related to the flicker 奖励4. 与闪现相关的奖励 """ reward_flicker = 0 # Reward 4.1 Penalty for flickering into the wall (TODO) # 奖励4.1 撞墙闪现的惩罚 (TODO) # Reward 4.2 Reward for normal flickering (TODO) # 奖励4.2 正常闪现的奖励 (TODO) # Reward 4.3 Reward for super flickering (TODO) # 奖励4.3 超级闪现的奖励 (TODO) """ Reward 5. Rewards for quick clearance 奖励5. 关于快速通关的奖励 """ reward_step = 0 # Reward 5.1 Penalty for not getting close to the end point after collecting all the treasure chests # (TODO: Give penalty after collecting all the treasure chests, encourage full collection) # 奖励5.1 收集完所有宝箱却未靠近终点的惩罚 # (TODO: 收集完宝箱后再给予惩罚, 鼓励宝箱全收集) # Reward 5.2 Penalty for repeated exploration # 奖励5.2 重复探索的惩罚 reward_memory = 0 memory_map = obs.feature.memory_map # Reward 5.3 Penalty for bumping into the wall # 奖励5.3 撞墙的惩罚 reward_bump = 0 # Determine whether it bumps into the wall # 判断是否撞墙 is_bump = bump(curr_pos_x, curr_pos_z, prev_pos_x, prev_pos_z) if is_bump==0: reward_bump+=10 """ Concatenation of rewards: Here are 10 rewards provided, students can concatenate as needed, and can also add new rewards themselves 奖励的拼接: 这里提供了10个奖励, 同学们按需自行拼接, 也可以自行添加新的奖励 """ REWARD_CONFIG = { "reward_end_dist": "1", "reward_win": "1", "reward_buff_dist": "1", "reward_buff": "1", "reward_treasure_dists": "1", "reward_treasure": "1", "reward_flicker": "1", "reward_step": "-1", "reward_bump": "1", "reward_memory": "-1", } reward = [ reward_end_dist * float(REWARD_CONFIG["reward_end_dist"]), reward_win * float(REWARD_CONFIG["reward_win"]), reward_buff_dist * float(REWARD_CONFIG["reward_buff_dist"]), reward_buff * float(REWARD_CONFIG["reward_buff"]), reward_treasure_dist * float(REWARD_CONFIG["reward_treasure_dists"]), reward_treasure * float(REWARD_CONFIG["reward_treasure"]), reward_flicker * float(REWARD_CONFIG["reward_flicker"]), reward_step * float(REWARD_CONFIG["reward_step"]), reward_bump * float(REWARD_CONFIG["reward_bump"]), reward_memory * float(REWARD_CONFIG["reward_memory"]), ] return sum(reward), is_bump @attached def observation_process(raw_obs, env_info=None): """ This function is an important feature processing function, mainly responsible for: - Parsing information in the raw data - Parsing preprocessed feature data - Processing the features and returning the processed feature vector - Concatenation of features - Annotation of legal actions Function inputs: - raw_obs: Preprocessed feature data - env_info: Environment information returned by the game Function outputs: - observation: Feature vector - legal_action: Annotation of legal actions 该函数是特征处理的重要函数, 主要负责： - 解析原始数据里的信息 - 解析预处理后的特征数据 - 对特征进行处理, 并返回处理后的特征向量 - 特征的拼接 - 合法动作的标注函数的输入： - raw_obs: 预处理后的特征数据 - env_info: 游戏返回的环境信息函数的输出： - observation: 特征向量 - legal_acti

评论收藏

内容反馈