#!/usr/bin/env python3
# -*- coding:utf-8 -*-
"""
@Project :back_to_the_realm
@File :definition.py
@Author :kaiwu
@Date :2022/12/15 22:50
"""
from kaiwu_agent.utils.common_func import create_cls, attached
import numpy as np
from kaiwu_agent.back_to_the_realm.target_dqn.feature_process import (
one_hot_encoding,
read_relative_position,
bump,
)
# The create_cls function is used to dynamically create a class. The first parameter of the function is the type name,
# and the remaining parameters are the attributes of the class, which should have a default value of None.
# create_cls函数用于动态创建一个类,函数第一个参数为类型名称,剩余参数为类的属性,属性默认值应设为None
ObsData = create_cls(
"ObsData",
feature=None,
legal_act=None,
)
ActData = create_cls(
"ActData",
move_dir=None,
use_talent=None,
)
SampleData = create_cls(
"SampleData",
obs=None,
_obs=None,
obs_legal=None,
_obs_legal=None,
act=None,
rew=None,
ret=None,
done=None,
)
def reward_shaping(
frame_no, score, terminated, truncated, obs, _obs, env_info, _env_info
):
reward = 0
# Get the current position coordinates of the agent
# 获取当前智能体的位置坐标
pos = _env_info.frame_state.heroes[0].pos
curr_pos_x, curr_pos_z = pos.x, pos.z
# Get the grid-based distance of the current agent's position relative to the end point, buff, and treasure chest
# 获取当前智能体的位置相对于终点, buff, 宝箱的栅格化距离
end_dist = _obs.feature.end_pos.grid_distance
buff_dist = _obs.feature.buff_pos.grid_distance
treasure_dists = [pos.grid_distance for pos in _obs.feature.treasure_pos]
# Get the agent's position from the previous frame
# 获取智能体上一帧的位置
prev_pos = env_info.frame_state.heroes[0].pos
prev_pos_x, prev_pos_z = prev_pos.x, prev_pos.z
# Get the grid-based distance of the agent's position from the previous
# frame relative to the end point, buff, and treasure chest
# 获取智能体上一帧相对于终点,buff, 宝箱的栅格化距离
prev_end_dist = obs.feature.end_pos.grid_distance
prev_buff_dist = obs.feature.buff_pos.grid_distance
prev_treasure_dists = [pos.grid_distance for pos in obs.feature.treasure_pos]
# Get the status of the buff
# 获取buff的状态
buff_availability = 0
for organ in env_info.frame_state.organs:
if organ.sub_type == 2:
buff_availability = 1
# Get the acceleration status of the agent
# 获取智能体的加速状态
prev_speed_up = env_info.frame_state.heroes[0].speed_up
speed_up = _env_info.frame_state.heroes[0].speed_up
"""
Reward 1. Reward related to the end point
奖励1. 与终点相关的奖励
"""
reward_end_dist = 0
# Reward 1.1 Reward for getting closer to the end point
# 奖励1.1 向终点靠近的奖励
# Boundary handling: At the first frame, prev_end_dist is initialized to 1,
# and no reward is calculated at this time
# 边界处理: 第一帧时prev_end_dist初始化为1,此时不计算奖励
if prev_end_dist != 1:
reward_end_dist += 1 if end_dist < prev_end_dist else 0
# Reward 1.2 Reward for winning
# 奖励1.2 获胜的奖励
reward_win = 0
if terminated:
reward_win += 100
"""
Reward 2. Rewards related to the treasure chest
奖励2. 与宝箱相关的奖励
"""
reward_treasure_dist = 0
# Reward 2.1 Reward for getting closer to the treasure chest (only consider the nearest one)
# 奖励2.1 向宝箱靠近的奖励(只考虑最近的那个宝箱)
# Reward 2.2 Reward for getting the treasure chest
# 奖励2.2 获得宝箱的奖励
reward_treasure = 0
if prev_treasure_dists.count(1.0) < treasure_dists.count(1.0):
reward_treasure += 10
"""
Reward 3. Rewards related to the buff
奖励3. 与buff相关的奖励
"""
# Reward 3.1 Reward for getting closer to the buff
# 奖励3.1 靠近buff的奖励
reward_buff_dist = 0
# Reward 3.2 Reward for getting the buff
# 奖励3.2 获得buff的奖励
reward_buff = 0
"""
Reward 4. Rewards related to the flicker
奖励4. 与闪现相关的奖励
"""
reward_flicker = 0
# Reward 4.1 Penalty for flickering into the wall (TODO)
# 奖励4.1 撞墙闪现的惩罚 (TODO)
# Reward 4.2 Reward for normal flickering (TODO)
# 奖励4.2 正常闪现的奖励 (TODO)
# Reward 4.3 Reward for super flickering (TODO)
# 奖励4.3 超级闪现的奖励 (TODO)
"""
Reward 5. Rewards for quick clearance
奖励5. 关于快速通关的奖励
"""
reward_step = 0
# Reward 5.1 Penalty for not getting close to the end point after collecting all the treasure chests
# (TODO: Give penalty after collecting all the treasure chests, encourage full collection)
# 奖励5.1 收集完所有宝箱却未靠近终点的惩罚
# (TODO: 收集完宝箱后再给予惩罚, 鼓励宝箱全收集)
# Reward 5.2 Penalty for repeated exploration
# 奖励5.2 重复探索的惩罚
reward_memory = 0
memory_map = obs.feature.memory_map
# Reward 5.3 Penalty for bumping into the wall
# 奖励5.3 撞墙的惩罚
reward_bump = 0
# Determine whether it bumps into the wall
# 判断是否撞墙
is_bump = bump(curr_pos_x, curr_pos_z, prev_pos_x, prev_pos_z)
if is_bump==0:
reward_bump+=10
"""
Concatenation of rewards: Here are 10 rewards provided,
students can concatenate as needed, and can also add new rewards themselves
奖励的拼接: 这里提供了10个奖励, 同学们按需自行拼接, 也可以自行添加新的奖励
"""
REWARD_CONFIG = {
"reward_end_dist": "1",
"reward_win": "1",
"reward_buff_dist": "1",
"reward_buff": "1",
"reward_treasure_dists": "1",
"reward_treasure": "1",
"reward_flicker": "1",
"reward_step": "-1",
"reward_bump": "1",
"reward_memory": "-1",
}
reward = [
reward_end_dist * float(REWARD_CONFIG["reward_end_dist"]),
reward_win * float(REWARD_CONFIG["reward_win"]),
reward_buff_dist * float(REWARD_CONFIG["reward_buff_dist"]),
reward_buff * float(REWARD_CONFIG["reward_buff"]),
reward_treasure_dist * float(REWARD_CONFIG["reward_treasure_dists"]),
reward_treasure * float(REWARD_CONFIG["reward_treasure"]),
reward_flicker * float(REWARD_CONFIG["reward_flicker"]),
reward_step * float(REWARD_CONFIG["reward_step"]),
reward_bump * float(REWARD_CONFIG["reward_bump"]),
reward_memory * float(REWARD_CONFIG["reward_memory"]),
]
return sum(reward), is_bump
@attached
def observation_process(raw_obs, env_info=None):
"""
This function is an important feature processing function, mainly responsible for:
- Parsing information in the raw data
- Parsing preprocessed feature data
- Processing the features and returning the processed feature vector
- Concatenation of features
- Annotation of legal actions
Function inputs:
- raw_obs: Preprocessed feature data
- env_info: Environment information returned by the game
Function outputs:
- observation: Feature vector
- legal_action: Annotation of legal actions
该函数是特征处理的重要函数, 主要负责:
- 解析原始数据里的信息
- 解析预处理后的特征数据
- 对特征进行处理, 并返回处理后的特征向量
- 特征的拼接
- 合法动作的标注
函数的输入:
- raw_obs: 预处理后的特征数据
- env_info: 游戏返回的环境信息
函数的输出:
- observation: 特征向量
- legal_acti
没有合适的资源?快使用搜索试试~ 我知道了~
腾讯开悟-重返秘境模型(仅到终点)
共56个文件
py:31个
pyc:11个
toml:6个
需积分: 3 1 下载量 77 浏览量
2024-09-01
22:02:09
上传
评论
收藏 4.69MB ZIP 举报
温馨提示
平均分800左右
资源推荐
资源详情
资源评论
收起资源包目录
1724929270FANx2CaqUfixENZdhhcVGsmRPdGnH9iP6730.zip (56个子文件)
.kaiwu.sign 853B
target_dqn
feature
__init__.py 0B
definition.py 12KB
__pycache__
definition.cpython-311.pyc 9KB
__init__.cpython-311.pyc 172B
__init__.py 0B
train_workflow.py 7KB
model
__init__.py 0B
model.py 2KB
__pycache__
__init__.cpython-311.pyc 170B
model.cpython-311.pyc 4KB
algorithm
__init__.py 0B
agent.py 9KB
__pycache__
agent.cpython-311.pyc 17KB
__init__.cpython-311.pyc 174B
__pycache__
config.cpython-311.pyc 1004B
__init__.cpython-311.pyc 164B
config.py 2KB
dqn
feature
__init__.py 0B
definition.py 12KB
__init__.py 0B
train_workflow.py 7KB
model
__init__.py 0B
model.py 2KB
algorithm
__init__.py 0B
agent.py 9KB
config.py 2KB
diy
feature
__init__.py 0B
definition.py 2KB
__init__.py 0B
train_workflow.py 839B
model
__init__.py 0B
model.py 418B
algorithm
__init__.py 0B
agent.py 1011B
config.py 1KB
ckpt
model.ckpt-6853.pkl 4.63MB
kaiwu.json 526B
id_list 28B
conf
__init__.py 0B
usr_conf.py 3KB
configure_app.toml 4KB
nature_app_back_to_the_realm.yaml 422B
back_to_the_realm_state.py 3KB
nature_algo_back_to_the_realm.yaml 1KB
configure_app.toml.bk 4KB
kaiwudrl
learner.toml 2KB
actor.toml 2KB
configure.toml 12KB
aisrv.toml 2KB
client.toml 325B
.gitignore 9B
__pycache__
__init__.cpython-311.pyc 158B
back_to_the_realm_state.cpython-311.pyc 4KB
back_to_the_realm_action.cpython-311.pyc 3KB
back_to_the_realm_action.py 1KB
共 56 条
- 1
资源评论
蒹葭以西
- 粉丝: 11
- 资源: 3
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功