//<APPLET CODE = "SwingApplet.class" WIDTH = 700 HEIGHT = 400 ></applet>
// load for early releases
//import com.sun.java.swing.*;
import javax.swing.*;
import javax.swing.event.*;
import java.awt.*;
import java.awt.event.*;
import java.util.*;
public class SwingApplet extends JApplet implements ActionListener,Runnable{
static final int BW=300, BH=300, BX=8, BY=8, NUM_WALLS=20,
SAMP_W = 100, SAMP_H = 100;
static final int DEF_EPOCHS = 50000;
static final long DELAY=500;
static int MAXX=400, MAXY=400;
CatAndMouseGame game;
CatAndMouseWorld trainWorld, playWorld; // seperate world from playing world
RLController rlc;
RLearner rl;
JTabbedPane tabbedPane;
Container instructions, playPanel, trainPanel, worldPanel;
// world setting components
JTextField rows, cols, obst;
sampleWorlds samples;
boolean[][] selectedWalls;
ButtonGroup worldSelGroup;
boolean sampleWorld=true, designWorld=false;
// instructions components
JLabel instructLabel, usageLabel;
final String INSTRUCT_MESSAGE = "<html><p>This applet demonstrates how reinforcement <p>learning can be used to train an agent to play <p>a simple game. In this case the game is Cat and <p>Mouse- the mouse tries to get to the cheese <p>and back to it's hole, the cat tries to catch the mouse.",
USAGE_MESSAGE = "<html><p>You can train the agent by selecting the Train tab. At <p>any time you can select the Play tab to see how <p>well the agent is performing! Of course, the more <p>training, the better the chance the mouse <p>has of surviving :)";
// train panel components
public static final String START="S", CONT_CHECK="C";
final String SETTINGS_TEXT = "These settings adjust some of the internal workings of the reinforcement learning algorithm.",
SETTINGS_TEXT2 = "Please see the web pages for more details on what the parameters do.";
JTextField alpha, gamma, epsilon, epochs, penalty, reward;
JButton startTraining, stopTraining;
JRadioButton softmax, greedy, sarsa, qlearn;
JProgressBar progress;
JLabel learnEpochsDone;
// play panel components
JButton startbutt, stopbutt, pausebutt;
boardPanel bp;
public int mousescore=0, catscore =0;
JLabel catscorelabel, mousescorelabel;
final String MS_TEXT = "Mouse Score:", CS_TEXT = "Cat Score:";
JSlider speed, smoothSlider;
Image catImg, mouseImg;
chartPanel graphPanel;
JLabel winPerc;
boardObject cat, mouse, cheese, back, hole, wall;
public SwingApplet() {
getRootPane().putClientProperty("defeatSystemEventQueueCheck",Boolean.TRUE);
}
public void init() {
// load images
catImg = getImage(getCodeBase(), "cat.gif");
mouseImg = getImage(getCodeBase(), "mouse.gif");
Image wallImg = getImage(getCodeBase(), "wall.gif");
Image cheeseImg = getImage(getCodeBase(), "cheese.gif");
Image floorImg = getImage(getCodeBase(), "floor.gif");
/* Image catImg = getImage(ClassLoader.getSystemResource("cat.gif"));
Image mouseImg = getImage(ClassLoader.getSystemResource("mouse.gif"));
Image wallImg = getImage(ClassLoader.getSystemResource("wall.gif"));
Image cheeseImg = getImage(ClassLoader.getSystemResource("cheese.gif"));*/
// set up board objects
cat = new boardObject(catImg);
mouse = new boardObject(mouseImg);
cheese = new boardObject(cheeseImg);
back = new boardObject(floorImg);
hole = new boardObject(Color.orange);
wall = new boardObject(wallImg);
// setup content panes
tabbedPane = new JTabbedPane();
//instructions = makeInstructions();
worldPanel = makeWorldPanel();
playPanel = makePlayPanel();
trainPanel = makeTrainPanel();
tabbedPane.addTab("World", worldPanel);
tabbedPane.addTab("Play", playPanel);
//tabbedPane.addTab("Instructions", instructions);
tabbedPane.addTab("Train", trainPanel);
tabbedPane.setSelectedIndex(0);
// disable panes until world created
tabbedPane.setEnabledAt(1,false);
tabbedPane.setEnabledAt(2,false);
// set up controls
//setContentPane(new JPanel());
//getContentPane().add(tabbedPane);
getContentPane().add(tabbedPane);
}
public void worldInit(int xdim, int ydim, int numwalls) {
trainWorld = new CatAndMouseWorld(xdim, ydim,numwalls);
gameInit(xdim,ydim);
}
public void worldInit(boolean[][] givenWalls) {
int xdim = givenWalls.length, ydim = givenWalls[0].length;
trainWorld = new CatAndMouseWorld(xdim, ydim,givenWalls);
gameInit(xdim,ydim);
}
private void gameInit(int xdim, int ydim) {
// disable this pane
tabbedPane.setEnabledAt(0,false);
playWorld = new CatAndMouseWorld(xdim, ydim,trainWorld.walls);
bp.setDimensions(xdim, ydim);
rlc = new RLController(this, trainWorld, DELAY);
rl = rlc.learner;
rlc.start();
game = new CatAndMouseGame(this, DELAY, playWorld, rl.getPolicy());
game.start();
// set text fields on panels
penalty.setText(Integer.toString(trainWorld.deathPenalty));
reward.setText(Integer.toString(trainWorld.cheeseReward));
alpha.setText(Double.toString(rl.getAlpha()));
gamma.setText(Double.toString(rl.getGamma()));
epsilon.setText(Double.toString(rl.getEpsilon()));
// enable other panes
tabbedPane.setEnabledAt(1,true);
tabbedPane.setEnabledAt(2,true);
// switch active pane
tabbedPane.setSelectedIndex(1);
// set first position on board
updateBoard();
}
// this method is triggered by SwingUtilities.invokeLater in other threads
public void run() { updateBoard(); }
/************ general functions ****************/
public void updateBoard() {
// update score panels
mousescorelabel.setText(MS_TEXT+" "+Integer.toString(mousescore));
catscorelabel.setText(CS_TEXT+" "+Integer.toString(catscore));
if (game.newInfo) {
updateScore();
game.newInfo = false;
}
// update progress info
progress.setValue(rlc.epochsdone);
learnEpochsDone.setText(Integer.toString(rlc.totaldone));
if (rlc.newInfo) endTraining();
// update game board
bp.clearBoard();
// draw walls
boolean[][] w = game.getWalls();
for (int i=0; i<w.length; i++) {
for (int j=0; j<w[0].length; j++) {
if (w[i][j]) bp.setSquare(wall, i, j);
}
}
// draw objects (cat over mouse over cheese)
bp.setSquare(cheese, game.getCheese());
bp.setSquare(mouse, game.getMouse());
bp.setSquare(cat, game.getCat());
//bp.setSquare(hole, game.getHole());
// display text representation
//System.out.println(bp);
bp.repaint();
}
void doTraining() {
// begin training
int episodes = Integer.parseInt(epochs.getText());
double aval = Double.parseDouble(alpha.getText());
double gval = Double.parseDouble(gamma.getText());
double eval = Double.parseDouble(epsilon.getText());
int cval = Integer.parseInt(reward.getText());
int dval = Integer.parseInt(penalty.getText());
rl.setAlpha(aval);
rl.setGamma(gval);
rl.setEpsilon(eval);
// disable controls
startTraining.setEnabled(false);
epochs.setEnabled(false);
reward.setEnabled(false);
penalty.setEnabled(false);
alpha.setEnabled(false);
gamma.setEnabled(false);
epsilon.setEnabled(false);
softmax.setEnabled(false);
greedy.setEnabled(false);
sarsa.setEnabled(false);
qlearn.setEnabled(false);
// fix progress bar
progress.setMinimum(0);
progress.setMaximum(episodes);
progress.setValue(0);
// enable stop button
stopTraining.setEnabled(true);
// start training
trainWorld.cheeseReward = cval;
trainWorld.deathPenalty = dval;
rlc.setEpisodes(episodes);
}
void endTraining() {
// stop training
rlc.stopLearner();
// enable buttons
startTraining.setEnabled(true);
epochs.setEnabled(true);
reward.setEnabled(true);
penalty.setEnabled(true);
alpha.setEnabled(true);
gamma.setEnabled(true);
epsilon.setEnabled(true);
softmax.se
用强化学习算法实现的猫和老鼠游戏代码
5星 · 超过95%的资源 需积分: 50 148 浏览量
2009-12-03
19:07:09
上传
评论 4
收藏 56KB ZIP 举报
jose_hg
- 粉丝: 2
- 资源: 7
最新资源
- yolov5,SSD 可能使用到的一些代码
- 介绍离散性制造行业的MES系统流程
- 基于IDEA-CCNL/Randeng-Pegasus-238M-Summary-Chines微调的中文文本摘要任务源码+数据集
- 微信小程序源码 车源宝 二手车交易平台 源码下载
- 微信小程序源码 实现 城市切换 demo 根据城市首字母排序城市 选择城市 源码下载
- 2024新版计算机编译原理期末速成全套视频教程(视频+配套资料)
- VMware7.0虚拟机硬盘无法编辑,无法连接到Profile-Driven Storage Service
- arm64内核的mongo镜像
- 基于stm32f103c单片机+MPU6050+0.96英寸OLED显示屏双柄遥控器硬件(原理图+PCB)工程文件.zip
- 整理的关于少儿编程的学习路径,以及如何在小升初,初升高和大学充分的利用起来编程经验的优势
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
- 1
- 2
前往页