python开发基于深度强化学习(DQN)解决三维在线装箱问题源码+项目说明文档+模型.zip

共28个文件

py：10个

zip：8个

png：4个

版权申诉

深度强化学习

python

三维装箱问题

课程作业

5星 · 超过95%的资源 142 浏览量 2024-10-14 23:18:27 上传评论收藏 16.92MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

python开发基于深度强化学习(DQN)解决三维在线装箱问题源码+项目说明文档+模型.zip （28个子文件）

eval.py 2KB

draw.py 2KB

data.py 4KB

cnn.pth 6.06MB

images

ͼƬ2.png 3KB

fig1 1B

ͼƬ1.png 91KB

train.py 13KB

container.py 6KB

项目说明文档.7z 2KB

project

container.zip 2KB

eval.py 2KB

eval.zip 835B

images.zip 94KB

draw.zip 864B

draw.py 2KB

data.zip 1KB

data.py 4KB

cnn.pth 6.06MB

train.zip 4KB

项目说明文档.zip 3KB

images

ͼƬ2.png 3KB

fig1 1B

ͼƬ1.png 91KB

train.py 13KB

cnn.zip 5.54MB

container.py 6KB

项目说明文档.7z 2KB

# -*- coding: utf-8 -*- import torch import torch.nn as nn from torch.autograd import Variable import torch.nn.functional as F import numpy as np import random import copy import time # import draw from container import * from data import * # from draw import * # 1. Define some Hyper Parameters EPSILON = 0.9 # epsilon used for epsilon greedy approach BATCH_SIZE = 16 # batch size of sampling process from buffer LR = 0.0001 # learning rate GAMMA = 0.9 # discount factor TARGET_NETWORK_REPLACE_FREQ = 100 # How frequently target netowrk updates MEMORY_CAPACITY = 2000 # The capacity of experience replay buffer device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 2. Random generate box data solution = [[(91, 54, 45, 32), (105, 77, 72, 24), (79, 78, 48, 30)], [(108, 76, 30, 24), (110, 43, 25, 7), (92, 81, 55, 22), (81, 33, 28, 13), (120, 99, 73, 15)], [(88, 54, 39, 16), (94, 54, 36, 14), (87, 77, 43, 20), (100, 80, 72, 16), (83, 40, 36, 6),(91, 54, 22, 15), (109, 58, 54, 17), (94, 55, 30, 9)], [(86, 84, 45, 18), (81, 45, 34, 19), (70, 54, 37, 13), (71, 61, 52, 16), (78, 73, 40, 10),(69, 63, 46, 13), (72, 67, 56, 10), (75, 75, 36, 8), (94, 88, 50, 12), (65, 51, 50, 13)], [(108, 76, 30, 12), (110, 43, 25, 12), (92, 81, 55, 6), (81, 33, 28, 9), (120, 99, 73, 5), (111, 70, 48, 12), (98, 72, 46, 9), (95, 66, 31, 10), (85, 84, 30, 8), (71, 32, 25, 3), (36, 34, 25, 10), (97, 67, 62, 7), (33, 25, 23, 7), (95, 27, 26, 10), (94, 81, 44, 9)]] def random_generate(): idx = np.random.randint(0,len(solution)) box_list = solution[idx] gen_box_order = [] while True: index = np.random.randint(0, len(box_list)) box_list[index] = (box_list[index][0], box_list[index][1], box_list[index][2], box_list[index][3]-1) gen_box_order.append((box_list[index][0], box_list[index][1], box_list[index][2])) if box_list[index][3] == 0: box_list.pop(index) if len(box_list) == 0: break return gen_box_order def normalization(data): _range = np.max(data) - np.min(data) return (data - np.min(data)) / _range def standardization(data): mu = np.mean(data) sigma = np.std(data) return (data - mu) / sigma # 3. Define the network used in both target net and the net for training class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() # 继承__init__功能 ## 第一层卷积 self.conv1 = nn.Sequential( # 输入[2,587,233] nn.Conv2d( in_channels=2, # 输入图片的高度 out_channels=16, # 输出图片的高度 kernel_size=3, # 5x5的卷积核，相当于过滤器 stride=1, # 卷积核在图上滑动，每隔一个扫一次 padding=1, # 给图外边补上0 ), # 经过卷积层输出[16,28,28] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[16,14,14] 传入下一个卷积 ) ## 第二层卷积 self.conv2 = nn.Sequential( nn.Conv2d( in_channels=16, # 同上 out_channels=32, kernel_size=3, stride=1, padding=1 ), # 经过卷积输出[32, 14, 14] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[32,7,7] 传入输出层 ) ## 第三层卷积 self.conv3 = nn.Sequential( nn.Conv2d( in_channels=32, # 同上 out_channels=64, kernel_size=3, stride=1, padding=1 ), # 经过卷积输出[32, 14, 14] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[32,7,7] 传入输出层 ) ## 第四层卷积 self.conv4 = nn.Sequential( nn.Conv2d( in_channels=64, # 同上 out_channels=128, kernel_size=3, stride=1, padding=1 ), # 经过卷积输出[32, 14, 14] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[32,7,7] 传入输出层 ) ## 第五层卷积 self.conv5 = nn.Sequential( nn.Conv2d( in_channels=128, # 同上 out_channels=256, kernel_size=3, stride=1, padding=1 ), # 经过卷积输出[32, 14, 14] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[32,7,7] 传入输出层 ) ## 第六层卷积 self.conv6 = nn.Sequential( nn.Conv2d( in_channels=256, # 同上 out_channels=512, kernel_size=3, stride=1, padding=1 ), # 经过卷积输出[32, 14, 14] 传入池化层 nn.ReLU(), nn.MaxPool2d(kernel_size=2) # 经过池化输出[32,7,7] 传入输出层 ) ## 输出层 self.output = nn.Linear(in_features=512*9*3, out_features=1) def forward(self, x): x = self.conv1(x) x = self.conv2(x) # [batch, 32,7,7] x = self.conv3(x) # [batch, 32,7,7] x = self.conv4(x) # [batch, 32,7,7] x = self.conv5(x) # [batch, 32,7,7] x = self.conv6(x) # [batch, 32,7,7] x = x.view(x.size(0), -1) # 保留batch, 将后面的乘到一起 [batch, 32*7*7] output = self.output(x) # 输出[50,10] return output class DQN(object): def __init__(self): # -----------Define 2 networks (target and training)------# self.eval_net, self.target_net = CNN(), CNN() # Define counter, memory size and loss function self.learn_step_counter = 0 # count the steps of learning process self.memory: List = [None] * MEMORY_CAPACITY self.memory_counter = 0 # counter used for experience replay buffer # ------- Define the optimizer------# self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR) # ------Define the loss function-----# self.loss_func = nn.MSELoss() def choose_action(self, state:Container, cargo): # 可行点取最大的 is_encase, inputs, points, poses = state.encase(cargo) # torch.set_printoptions(profile="full") # print(inputs) if is_encase == False: return is_encase, is_encase, is_encase if np.random.uniform() < EPSILON: # greedy data = inputs.data.cpu().numpy() data = normalization(data) data = standardization(data) inputs = torch.tensor(data) with torch.no_grad(): actions_value = self.target_net.forward(inputs) action = torch.max(actions_value, 0)[1].data.numpy() action = action[0] point = points[action] pose = poses[action] else: action = np.random.randint(0, high=len(points)) point = points[action] pose = poses[action] return point, pose def store_transition(self, s:torch.Tensor, a:Cargo, r, s_:Container, a_:Cargo): transition = [s, a.matrix(), r, s_, a_] # if the capacity is full, then use index to replace the old memory with new one index = self.memory_counter % MEMORY_CAPACITY self.memory[index] = transition self.memory_counter += 1 def learn(self): # update the target network every fixed steps if self.learn_step_counter % TARGET_NETWORK_REPLACE_FREQ == 0: # Assign the parameters of eval_net to target_net self.target_net.load_sta

评论收藏

内容反馈

版权申诉