import numpy as np
import math
import matplotlib.pyplot as plt
# import theano
import parameters
class Env:
def __init__(self, pa, nw_len_seqs=None, nw_size_seqs=None,
seed=42, render=False, repre='image', end='no_new_job'):
self.pa = pa
self.render = render
self.repre = repre # image or compact representation
self.end = end # termination type, 'no_new_job' or 'all_done'
self.nw_dist = pa.dist.bi_model_dist
self.curr_time = 0
# set up random seed
if self.pa.unseen:
np.random.seed(314159)
else:
np.random.seed(seed)
if nw_len_seqs is None or nw_size_seqs is None:
# generate new work
self.nw_len_seqs, self.nw_size_seqs = \
self.generate_sequence_work(self.pa.simu_len * self.pa.num_ex)
self.workload = np.zeros(pa.num_res)
for i in range(pa.num_res):
self.workload[i] = \
np.sum(self.nw_size_seqs[:, i] * self.nw_len_seqs) / \
float(pa.res_slot) / \
float(len(self.nw_len_seqs))
print("Load on # " + str(i) + " resource dimension is " + str(self.workload[i]))
self.nw_len_seqs = np.reshape(self.nw_len_seqs,
[self.pa.num_ex, self.pa.simu_len])
self.nw_size_seqs = np.reshape(self.nw_size_seqs,
[self.pa.num_ex, self.pa.simu_len, self.pa.num_res])
else:
self.nw_len_seqs = nw_len_seqs
self.nw_size_seqs = nw_size_seqs
self.seq_no = 0 # which example sequence
self.seq_idx = 0 # index in that sequence
# initialize system
self.machine = Machine(pa)
self.job_slot = JobSlot(pa)
self.job_backlog = JobBacklog(pa)
self.job_record = JobRecord()
self.extra_info = ExtraInfo(pa)
def generate_sequence_work(self, simu_len):
nw_len_seq = np.zeros(simu_len, dtype=int)
nw_size_seq = np.zeros((simu_len, self.pa.num_res), dtype=int)
for i in range(simu_len):
if np.random.rand() < self.pa.new_job_rate: # a new job comes
nw_len_seq[i], nw_size_seq[i, :] = self.nw_dist()
return nw_len_seq, nw_size_seq
def get_new_job_from_seq(self, seq_no, seq_idx):
new_job = Job(res_vec=self.nw_size_seqs[seq_no, seq_idx, :],
job_len=self.nw_len_seqs[seq_no, seq_idx],
job_id=len(self.job_record.record),
enter_time=self.curr_time)
return new_job
def observe(self):
if self.repre == 'image':
backlog_width = int(math.ceil(self.pa.backlog_size / float(self.pa.time_horizon)))
image_repr = np.zeros((self.pa.network_input_height, self.pa.network_input_width))
ir_pt = 0
for i in range(self.pa.num_res):
image_repr[:, ir_pt: ir_pt + self.pa.res_slot] = self.machine.canvas[i, :, :]
ir_pt += self.pa.res_slot
for j in range(self.pa.num_nw):
if self.job_slot.slot[j] is not None: # fill in a block of work
image_repr[: self.job_slot.slot[j].len, ir_pt: ir_pt + self.job_slot.slot[j].res_vec[i]] = 1
ir_pt += self.pa.max_job_size
image_repr[: int(self.job_backlog.curr_size / backlog_width),
ir_pt: ir_pt + backlog_width] = 1
if self.job_backlog.curr_size % backlog_width > 0:
image_repr[int(self.job_backlog.curr_size / backlog_width),
ir_pt: ir_pt + self.job_backlog.curr_size % backlog_width] = 1
ir_pt += backlog_width
image_repr[:, ir_pt: ir_pt + 1] = self.extra_info.time_since_last_new_job / \
float(self.extra_info.max_tracking_time_since_last_job)
ir_pt += 1
assert ir_pt == image_repr.shape[1]
return image_repr.ravel()[np.newaxis, :]
#return image_repr
def plot_state(self):
plt.figure("screen", figsize=(20, 5))
skip_row = 0
for i in range(self.pa.num_res):
plt.subplot(self.pa.num_res,
1 + self.pa.num_nw + 1, # first +1 for current work, last +1 for backlog queue
i * (self.pa.num_nw + 1) + skip_row + 1) # plot the backlog at the end, +1 to avoid 0
plt.imshow(self.machine.canvas[i, :, :], interpolation='nearest', vmax=1)
for j in range(self.pa.num_nw):
job_slot = np.zeros((self.pa.time_horizon, self.pa.max_job_size))
if self.job_slot.slot[j] is not None: # fill in a block of work
job_slot[: self.job_slot.slot[j].len, :self.job_slot.slot[j].res_vec[i]] = 1
plt.subplot(self.pa.num_res,
1 + self.pa.num_nw + 1, # first +1 for current work, last +1 for backlog queue
1 + i * (self.pa.num_nw + 1) + j + skip_row + 1) # plot the backlog at the end, +1 to avoid 0
plt.imshow(job_slot, interpolation='nearest', vmax=1)
if j == self.pa.num_nw - 1:
skip_row += 1
skip_row -= 1
backlog_width = int(math.ceil(self.pa.backlog_size / float(self.pa.time_horizon)))
backlog = np.zeros((self.pa.time_horizon, backlog_width))
backlog[: self.job_backlog.curr_size / backlog_width, : backlog_width] = 1
backlog[self.job_backlog.curr_size / backlog_width, : self.job_backlog.curr_size % backlog_width] = 1
plt.subplot(self.pa.num_res,
1 + self.pa.num_nw + 1, # first +1 for current work, last +1 for backlog queue
self.pa.num_nw + 1 + 1)
plt.imshow(backlog, interpolation='nearest', vmax=1)
plt.subplot(self.pa.num_res,
1 + self.pa.num_nw + 1, # first +1 for current work, last +1 for backlog queue
self.pa.num_res * (self.pa.num_nw + 1) + skip_row + 1) # plot the backlog at the end, +1 to avoid 0
extra_info = np.ones((self.pa.time_horizon, 1)) * \
self.extra_info.time_since_last_new_job / \
float(self.extra_info.max_tracking_time_since_last_job)
plt.imshow(extra_info, interpolation='nearest', vmax=1)
plt.show() # manual
# plt.pause(0.01) # automatic
def get_reward(self):
reward = 0
for j in self.machine.running_job:
reward += self.pa.delay_penalty / float(j.len)
for j in self.job_slot.slot:
if j is not None:
reward += self.pa.hold_penalty / float(j.len)
for j in self.job_backlog.backlog:
if j is not None:
reward += self.pa.dismiss_penalty / float(j.len)
return reward
def step(self, a, repeat=False):
status = None
done = False
reward = 0
info = None
if a == self.pa.num_nw: # explicit void action
status = 'MoveOn'
elif self.job_slot.slot[a] is None: # implicit void action
#if self.seq_idx >= self.pa.simu_len and \
#len(self.machine.running_job) > 0 and \
#all(s is None for s in self.job_backlog.backlog):
#ob, reward, done, info = self.step(a + 1, repeat=True)
#return ob, reward, done, info
#else:
status = 'MoveOn'
else:
allocated = self.machine.allocate_job(self.job_slot.slot[a], self.curr_time)
if not allocated: # implicit void action
status = 'MoveOn'
else:
status = 'Allocate'
if status == 'MoveOn':
self.curr_time +=