Return to Question

Changed code and updated explanation

edited Aug 22, 2017 at 11:44

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf from pylab import savefig from tqdm import tqdm  #Screen Setup disp_x, disp_y = 15001000, 1000800 arena_x, arena_y = 8001000, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) green_yellow = (173, 255, 47); energy_blue = (125, 249, 255)  #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radiuscharacter_size = 3050 character_move_speed = 2025 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damagebeam_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_posbeam_width = list(init_character_b_state)10 bullet_a_fire = False; bullet_b_firebeam_ob = False-100 #The Neural Network input_layer = tf.placeholder(shape=[1,8]7],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8[7,9],0,0.1)) #weight_2 = tf.Variable(tf.random_uniform([6,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer()    jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() bot_beam_dir = int() agent_beam_fire = bot_beam_fire = bool() agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = int() agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = int() bot_current_action = agent_current_action = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fireagent_x, \  bot_bullet_fireagent_y, agent_bullet_direction_xbot_x, agent_bullet_direction_ybot_y, bot_bullet_direction_xagent_hp, \  bot_bullet_direction_ybot_hp, agent_xagent_beam_fire, agent_ybot_beam_fire, bot_xagent_beam_x, bot_ybot_beam_x, agent_hpagent_beam_y, bot_hpbot_beam_y  agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0  agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health  agent_beam_fire = bot_beam_fire = False agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = beam_ob agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = 0  def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_xcharacter_size, agent_x, \ agent_bullet_yagent_y, bullet_radiusbot_x, bot_bullet_xbot_y, bot_bullet_ycharacter_init_health, character_radiusagent_hp, agent_xbot_hp, red, blue, aqua, green, black, green_yellow, energy_blue, \ agent_yagent_beam_fire, bot_xbot_beam_fire, bot_yagent_beam_x, character_init_healthagent_beam_y, agent_hpbot_beam_x, bot_hpbot_beam_y, redagent_beam_size_x, blueagent_beam_size_y, aquabot_beam_size_x, greenbot_beam_size_y, blackbeam_width disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black,if [agent_bullet_x,bot_beam_fire agent_bullet_y],== bullet_radius)True:   draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius)  draw.circlerect(disp, blackgreen_yellow, (agent_xagent_beam_x, agent_y)agent_beam_y,  agent_beam_size_x, agent_beam_size_y))  bot_beam_fire = False  character_radius + border_2)  if agent_beam_fire == draw.circle(disp,True:  red, (agent_x, agent_y), character_radius)  draw.circlerect(disp, blackenergy_blue, (bot_xbot_beam_x, bot_y)bot_beam_y, bot_beam_size_x, bot_beam_size_y)) agent_beam_fire = False  draw.rect(disp, red, character_radius(agent_x, +agent_y, border_2character_size, character_size)) draw.circlerect(disp, blue, (bot_x, bot_y), character_radiuscharacter_size, character_size))   draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14))    def bot_take_action(): return random.randint(1, 9) def beam_hit_detector(player): global agent_x, agent_y, bot_x, bot_y, character_radiusagent_beam_fire, bot_actionbot_beam_fire, borderagent_beam_x, \ ifbot_beam_x, agent_xagent_beam_y, -bot_beam_y, character_radiusagent_beam_size_x, -agent_beam_size_y, border\  <= bot_x <= agent_xbot_beam_size_x, +bot_beam_size_y, character_radiusbot_current_action, +agent_current_action, borderbeam_width, character_size if player == "bot": if random.randint(0, 100)bot_current_action >== 51: if disp_y/2 - arena_y/2 <= agent_y <= bot_y and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size):   bot_action =return 1True else: bot_action =return 3False   else:  elif bot_current_action == 2:  bot_action = 9  elif agent_y -if character_radiusbot_x <= bot_yagent_x <= agent_ydisp_x/2 + character_radius:  arena_x/2 ifand random.randint(0, 100) >agent_y 5: < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y if< agent_xagent_y <=+ bot_xcharacter_size): bot_action =return 4True else: bot_action = 2 else: bot_action = 9 return else:False if random.randint(0,elif 100)bot_current_action >== 53:   x_dist = abs(bot_x - agent_x); y_dist =if abs(bot_y -<= agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0:  disp_y/2 + bot_actionarena_y/2 =and 6 (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + elsecharacter_size):   bot_action =return 8True else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player ==return "bot":False ifelif bot_bullet_xbot_current_action == last_bot_bullet_x4: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border:  #If the current state of the bullet is touchingdisp_x/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y2 - bullet_radius > agent_yarena_x/2 +<= character_radiusagent_x +<= borderbot_x and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \  or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_ybot_beam_y -+ bullet_radiusbeam_width < agent_y + character_radius + bordercharacter_size or \  agent_y - character_radius - border < bot_bullet_y + bullet_radiusbot_beam_y < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radiuscharacter_size):   return True   else:   return False else: if agent_bullet_xagent_current_action == last_agent_bullet_x1: if bot_x - character_radiusdisp_y/2 - borderarena_y/2 <<= agent_bullet_xbot_y +<= bullet_radiusagent_y <and (bot_x +< character_radiusagent_beam_x + border or \  beam_width < bot_x -+ character_radiuscharacter_size -or borderbot_x < agent_bullet_x - bullet_radiusagent_beam_x < bot_x + character_radius + bordercharacter_size):   #Ifreturn theTrue  current state of the bullet is touching/inside the agent else: ifreturn bot_yFalse  - character_radius - border < agent_bullet_y + bullet_radiuselif <agent_current_action bot_y== +2:  character_radius or \  if agent_x <= bot_x <= disp_x/2 + bot_yarena_x/2 -and character_radius(bot_y < agent_bullet_yagent_beam_y -+ bullet_radiusbeam_width < bot_y + character_radiuscharacter_size +or border: bot_y < agent_beam_y < bot_y + character_size):  return True   else:  #If the bullet "passed through" the character from the last turn:return False elif agent_current_action == 3:  elif (last_agent_bullet_y - bullet_radius > if agent_y <= bot_y +<= character_radiusdisp_y/2 + borderarena_y/2 and bot_y(bot_x -< character_radiusagent_beam_x -+ borderbeam_width >< agent_bullet_ybot_x + bullet_radius)character_size \ or bot_x < agent_beam_x < bot_x + character_size):  or (agent_bullet_y - bullet_radius > bot_y + character_radiusreturn +True  border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius) else:   return TrueFalse elif bot_current_action == 4:  else:  if disp_x/2 - arena_x/2 <= bot_x <= agent_x and (bot_y < agent_beam_y + returnbeam_width False < bot_y + character_size or bot_y < agent_beam_y elif< agent_bullet_ybot_y ==+ last_agent_bullet_ycharacter_size): if bot_y - character_radius -return borderTrue  < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \else:   bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius +return border:False  #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(absnumber#int(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_xagent_x, bot_bullet_yagent_y, agent_bullet_xbot_x, agent_bullet_ybot_y, agent_bullet_fireagent_hp, bot_hp, agent_beam_fire, \ bot_bullet_firebot_beam_fire, agent_bullet_direction_xagent_beam_x, agent_bullet_direction_ybot_beam_x, bot_bullet_direction_xagent_beam_y, bot_beam_y, agent_beam_size_x, \ bot_bullet_direction_yagent_beam_size_y, agent_xbot_beam_size_x, agent_ybot_beam_size_y, bot_xbeam_width, bot_yagent_current_action, agent_hpbot_current_action, bot_hp,character_size  last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_yagent_current_action = agent_action; bot_current_action = bot_action reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_firebot_beam_fire = True if bot_action == 1: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = disp_y/2 -bullet_speed arena_y/2 bot_beam_size_x = beam_width; bot_beam_size_y = bot_y - disp_y/2 + arena_y/2 elif bot_action == 2: bot_bullet_direction_xbot_beam_x = bullet_speed;bot_x bot_bullet_direction_y+ character_size; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_beam_size_x = disp_x/2 + arena_x/2 - bot_x - character_size; bot_beam_size_y = beam_width elif bot_action == 3: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = bullet_speedbot_y + character_size bot_beam_size_x = beam_width; bot_beam_size_y = disp_y/2 + arena_y/2 - bot_y - character_size elif bot_action == 4: bot_bullet_direction_xbot_beam_x = disp_x/2 -bullet_speed; bot_bullet_direction_yarena_x/2; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_bullet_x bot_beam_size_x = bot_x +- bot_bullet_direction_x;disp_x/2 bot_bullet_y+ =arena_x/2; bot_ybot_beam_size_y += bot_bullet_direction_ybeam_width elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 elif agent_y <= bot_y <= agent_y + character_radiuscharacter_size:  bot_y = agent_y + 1character_size elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1character_size: bot_x = disp_x/2 + arena_x/2 - character_radiuscharacter_size  elif agent_x <= bot_x + character_size <= agent_x + character_size: bot_x = agent_x - 1character_size elif bot_action == 7: bot_y += character_move_speed if bot_y + character_size >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radiuscharacter_size elif agent_y <= bot_y + character_size <= agent_y + character_size: bot_y = agent_y - 1character_size elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 elif agent_x <= bot_x <= agent_x + character_radiuscharacter_size:  bot_x = agent_x + 1character_size if bot_bullet_firebot_beam_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y  if bullet_hit_detectorbeam_hit_detector("bot"): print#print "Agent Got Hit!" agent_hp -= bullet_damagebeam_damage reward =+= -50 bot_bullet_fire = False bot_bullet_direction_xbot_beam_size_x = 0; bot_bullet_direction_ybot_beam_size_y = 0   bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_xbot_beam_x = last_bot_bullet_ybot_beam_y = bullet_obbeam_ob if agent_hp <= 0: cont = False winner = "Bot"  elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob  if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fireagent_beam_fire = True   if agent_action == 1:   if agent_y - character_radius - border > disp_y/2 - arena_y/2:   agent_beam_x = agent_bullet_direction_xagent_x =- 0;beam_width/2; agent_bullet_direction_yagent_beam_y = disp_y/2 -bullet_speed  arena_y/2  agent_beam_size_x = beam_width; rewardagent_beam_size_y = 10 agent_y - disp_y/2 + arena_y/2  else:   reward =+= -25 agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False   elif agent_action == 2:   if agent_x + character_radius + bordercharacter_size < disp_x/2 + arena_x/2:   agent_bullet_direction_x = bullet_speed; agent_bullet_direction_yagent_beam_x = 0  agent_x + character_size; rewardagent_beam_y = 10 agent_y + character_size/2 - else:beam_width/2   rewardagent_beam_size_x = -25 disp_x/2 + arena_x/2 - agent_x - character_size; agent_beam_size_y = beam_width  agent_bullet_x = agent_bullet_y = bullet_obelse:   agent_bullet_firereward =+= False-25   elif agent_action == 3:   if agent_y + character_radius + bordercharacter_size < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_yagent_beam_x = bullet_speed  agent_x + character_size/2 - beam_width/2; rewardagent_beam_y = 10  agent_y + else:character_size   rewardagent_beam_size_x = -25 beam_width; agent_beam_size_y = disp_y/2 + arena_y/2 - agent_y - character_size  agent_bullet_x = agent_bullet_y = bullet_obelse:   agent_bullet_firereward =+= False-25   elif agent_action == 4:   if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_yagent_beam_x = 0  disp_x/2 - arena_x/2; rewardagent_beam_y = 10  else: agent_y reward+ =character_size/2 -25  agent_bullet_x = agent_bullet_y = bullet_obbeam_width/2   agent_bullet_fireagent_beam_size_x = False  if agent_bullet_fire == True: agent_bullet_xagent_x =- agent_xdisp_x/2 + agent_bullet_direction_x;arena_x/2; agent_bullet_yagent_beam_size_y = agent_y + agent_bullet_direction_ybeam_width   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_yelse: else:  reward =+= -2025  elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward =+= -5 elseelif bot_y <= agent_y <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y + character_size reward += -2 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + bordercharacter_size >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_x <= agent_x + character_size <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: rewardagent_x = 5bot_x - character_size reward += -2 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + bordercharacter_size >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_y <= agent_y + character_size <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y - character_size reward += -2 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward =+= -5 else: elif bot_x <= agent_x <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size:  reward = 5  if agent_bullet_fire == True:  agent_x = last_agent_bullet_xbot_x =+ agent_bullet_x;character_size  last_agent_bullet_y = agent_bullet_y  agent_bullet_x += agent_bullet_direction_x; agent_bullet_y  reward += agent_bullet_direction_y-2  if agent_beam_fire == True: if bullet_hit_detectorbeam_hit_detector("agent"): print#print "Bot Got Hit!" bot_hp -= bullet_damagebeam_damage reward = 100  agent_bullet_fire =+= False50 agent_bullet_direction_xagent_beam_size_x = 0; agent_bullet_direction_yagent_beam_size_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_xagent_beam_x = last_agent_bullet_yagent_beam_y = bullet_obbeam_ob if bot_hp <= 0: successful = True cont = False winner = "Agent"  elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob  return reward, cont, successful, winner def bot_beam_dir_detector(): global bot_current_action if bot_current_action == 1: bot_beam_dir = 2 elif bot_current_action == 2: bot_beam_dir = 4 elif bot_current_action == 3: bot_beam_dir = 3 elif bot_current_action == 4: bot_beam_dir = 1 else: bot_beam_dir = 0 return bot_beam_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10100 with tf.Session() as sess: sess.run(initialize) success = 0 for i in tqdm(range(1, num_episodes)): #print "Episode #", i rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1  screen_blit()  current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]])   b = bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state})    r, c, d, winner = action(a + 1, bot_actionb) bot_beam_dir = bot_beam_dir_detector() next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r  screen_blit()  if d == True: e = 1. / ((i / 50) + 10) success += 1 break  #print agent_hp, bot_hp  display.update()  jList.append(j)  rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList) plt.show()

Update #3 on August 22, 2017:

I've noticed that if the agent hits the bot with a bullet on a turn and the action the bot taken on that turn was not "fire a bullet", then the wrong actions would be given credit. Thus, I've turned the bullets into beams so the bot/agent takes damage on the turn the beam's fired.

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer()    jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \  bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \  bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp  agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0  agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius)   draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius)  draw.circle(disp, black, (agent_x, agent_y),  character_radius + border_2)  draw.circle(disp, red, (agent_x, agent_y), character_radius)  draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y:   bot_action = 1 else: bot_action = 3   else:  bot_action = 9  elif agent_y - character_radius <= bot_y <= agent_y + character_radius:  if random.randint(0, 100) > 5:  if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9  else: if random.randint(0, 100) > 5:   x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0:  bot_action = 6  else:   bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border:  #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \  or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \  agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius):   return True   else:   return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \  bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border:   #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \  bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border:  return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \  or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius):   return True else:  return False  elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \   bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border:  #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_y reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_fire = True if bot_action == 1: bot_bullet_direction_x = 0; bot_bullet_direction_y = -bullet_speed elif bot_action == 2: bot_bullet_direction_x = bullet_speed; bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_direction_x = 0; bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_direction_x = -bullet_speed; bot_bullet_direction_y = 0 bot_bullet_x = bot_x + bot_bullet_direction_x; bot_bullet_y = bot_y + bot_bullet_direction_y elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1 if bot_bullet_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y  if bullet_hit_detector("bot"): print "Agent Got Hit!" agent_hp -= bullet_damage reward = -50 bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0   bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_hp <= 0: cont = False winner = "Bot"  elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob  if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True   if agent_action == 1:   if agent_y - character_radius - border > disp_y/2 - arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed  reward = 10  else:   reward = -25 agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False   elif agent_action == 2:   if agent_x + character_radius + border < disp_x/2 + arena_x/2:   agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0  reward = 10  else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 3:   if agent_y + character_radius + border < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed  reward = 10  else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 4:   if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = 0  reward = 10  else:  reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False  if agent_bullet_fire == True: agent_bullet_x = agent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y else:  reward = -20 elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward = -5 else:  reward = 5  if agent_bullet_fire == True:  last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y  agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100  agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent"  elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob  return reward, cont, successful, winner #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1  screen_blit()  current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping(complexity, float(bot_bullet_x / float(arena_x))), mapping(complexity, float(bot_bullet_y / float(arena_y))), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state})    r, c, d, winner = action(a + 1, bot_action) next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping(complexity, float(bot_bullet_x / float(arena_x))), mapping(complexity, float(bot_bullet_y / float(arena_y))), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update() rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList) plt.show()

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random import tensorflow as tf from pylab import savefig from tqdm import tqdm  #Screen Setup disp_x, disp_y = 1000, 800 arena_x, arena_y = 1000, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) green_yellow = (173, 255, 47); energy_blue = (125, 249, 255)  #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_size = 50 character_move_speed = 25 #Initialize character stats character_init_health = 100 #initialize bullet stats beam_damage = 10 beam_width = 10 beam_ob = -100 #The Neural Network input_layer = tf.placeholder(shape=[1,7],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([7,9],0,0.1)) #weight_2 = tf.Variable(tf.random_uniform([6,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) #CHARACTER/BULLET PARAMETERS agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() bot_beam_dir = int() agent_beam_fire = bot_beam_fire = bool() agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = int() agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = int() bot_current_action = agent_current_action = int() def param_init(): """Initializes parameters""" global agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_beam_fire, bot_beam_fire, agent_beam_x, bot_beam_x, agent_beam_y, bot_beam_y agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health  agent_beam_fire = bot_beam_fire = False agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = beam_ob agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = 0  def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, character_size, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black, green_yellow, energy_blue, \ agent_beam_fire, bot_beam_fire, agent_beam_x, agent_beam_y, bot_beam_x, bot_beam_y, agent_beam_size_x, agent_beam_size_y, bot_beam_size_x, bot_beam_size_y, beam_width disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) if bot_beam_fire == True: draw.rect(disp, green_yellow, (agent_beam_x, agent_beam_y, agent_beam_size_x, agent_beam_size_y))  bot_beam_fire = False  if agent_beam_fire == True:  draw.rect(disp, energy_blue, (bot_beam_x, bot_beam_y, bot_beam_size_x, bot_beam_size_y)) agent_beam_fire = False  draw.rect(disp, red, (agent_x, agent_y, character_size, character_size)) draw.rect(disp, blue, (bot_x, bot_y, character_size, character_size))   draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14))    def bot_take_action(): return random.randint(1, 9) def beam_hit_detector(player): global agent_x, agent_y, bot_x, bot_y, agent_beam_fire, bot_beam_fire, agent_beam_x, \ bot_beam_x, agent_beam_y, bot_beam_y, agent_beam_size_x, agent_beam_size_y, \  bot_beam_size_x, bot_beam_size_y, bot_current_action, agent_current_action, beam_width, character_size if player == "bot": if bot_current_action == 1: if disp_y/2 - arena_y/2 <= agent_y <= bot_y and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size): return True else: return False elif bot_current_action == 2:  if bot_x <= agent_x <= disp_x/2 + arena_x/2 and (agent_y < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y < agent_y + character_size): return True else: return False elif bot_current_action == 3: if bot_y <= agent_y <= disp_y/2 + arena_y/2 and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size): return True else: return False elif bot_current_action == 4: if disp_x/2 - arena_x/2 <= agent_x <= bot_x and (agent_y < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y < agent_y + character_size): return True else: return False else: if agent_current_action == 1: if disp_y/2 - arena_y/2 <= bot_y <= agent_y and (bot_x < agent_beam_x + beam_width < bot_x + character_size or bot_x < agent_beam_x < bot_x + character_size): return True   else: return False  elif agent_current_action == 2:  if agent_x <= bot_x <= disp_x/2 + arena_x/2 and (bot_y < agent_beam_y + beam_width < bot_y + character_size or bot_y < agent_beam_y < bot_y + character_size):  return True   else:  return False elif agent_current_action == 3:   if agent_y <= bot_y <= disp_y/2 + arena_y/2 and (bot_x < agent_beam_x + beam_width < bot_x + character_size or bot_x < agent_beam_x < bot_x + character_size):  return True   else: return False elif bot_current_action == 4:  if disp_x/2 - arena_x/2 <= bot_x <= agent_x and (bot_y < agent_beam_y + beam_width < bot_y + character_size or bot_y < agent_beam_y < bot_y + character_size): return True  else: return False def mapping(maximum, number): return number#int(number * maximum) def action(agent_action, bot_action): global agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_beam_fire, \ bot_beam_fire, agent_beam_x, bot_beam_x, agent_beam_y, bot_beam_y, agent_beam_size_x, \ agent_beam_size_y, bot_beam_size_x, bot_beam_size_y, beam_width, agent_current_action, bot_current_action, character_size  agent_current_action = agent_action; bot_current_action = bot_action reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4: bot_beam_fire = True if bot_action == 1: bot_beam_x = bot_x + character_size/2 - beam_width/2; bot_beam_y = disp_y/2 - arena_y/2 bot_beam_size_x = beam_width; bot_beam_size_y = bot_y - disp_y/2 + arena_y/2 elif bot_action == 2: bot_beam_x = bot_x + character_size; bot_beam_y = bot_y + character_size/2 - beam_width/2 bot_beam_size_x = disp_x/2 + arena_x/2 - bot_x - character_size; bot_beam_size_y = beam_width elif bot_action == 3: bot_beam_x = bot_x + character_size/2 - beam_width/2; bot_beam_y = bot_y + character_size bot_beam_size_x = beam_width; bot_beam_size_y = disp_y/2 + arena_y/2 - bot_y - character_size elif bot_action == 4: bot_beam_x = disp_x/2 - arena_x/2; bot_beam_y = bot_y + character_size/2 - beam_width/2  bot_beam_size_x = bot_x - disp_x/2 + arena_x/2; bot_beam_size_y = beam_width elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2: bot_y = disp_y/2 - arena_y/2 elif agent_y <= bot_y <= agent_y + character_size:  bot_y = agent_y + character_size elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_size: bot_x = disp_x/2 + arena_x/2 - character_size  elif agent_x <= bot_x + character_size <= agent_x + character_size: bot_x = agent_x - character_size elif bot_action == 7: bot_y += character_move_speed if bot_y + character_size >= disp_y/2 + arena_y/2: bot_y = disp_y/2 + arena_y/2 - character_size elif agent_y <= bot_y + character_size <= agent_y + character_size: bot_y = agent_y - character_size elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2: bot_x = disp_x/2 - arena_x/2 elif agent_x <= bot_x <= agent_x + character_size:  bot_x = agent_x + character_size if bot_beam_fire == True: if beam_hit_detector("bot"): #print "Agent Got Hit!" agent_hp -= beam_damage reward += -50 bot_beam_size_x = bot_beam_size_y = 0 bot_beam_x = bot_beam_y = beam_ob if agent_hp <= 0: cont = False winner = "Bot" if 1 <= agent_action <= 4: agent_beam_fire = True if agent_action == 1: if agent_y > disp_y/2 - arena_y/2: agent_beam_x = agent_x - beam_width/2; agent_beam_y = disp_y/2 - arena_y/2  agent_beam_size_x = beam_width; agent_beam_size_y = agent_y - disp_y/2 + arena_y/2  else: reward += -25 elif agent_action == 2: if agent_x + character_size < disp_x/2 + arena_x/2: agent_beam_x = agent_x + character_size; agent_beam_y = agent_y + character_size/2 - beam_width/2 agent_beam_size_x = disp_x/2 + arena_x/2 - agent_x - character_size; agent_beam_size_y = beam_width  else: reward += -25 elif agent_action == 3: if agent_y + character_size < disp_y/2 + arena_y/2: agent_beam_x = agent_x + character_size/2 - beam_width/2; agent_beam_y = agent_y + character_size agent_beam_size_x = beam_width; agent_beam_size_y = disp_y/2 + arena_y/2 - agent_y - character_size  else: reward += -25 elif agent_action == 4: if agent_x > disp_x/2 - arena_x/2: agent_beam_x = disp_x/2 - arena_x/2; agent_beam_y = agent_y + character_size/2 - beam_width/2 agent_beam_size_x = agent_x - disp_x/2 + arena_x/2; agent_beam_size_y = beam_width else: reward += -25  elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 reward += -5 elif bot_y <= agent_y <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: agent_y = bot_y + character_size reward += -2 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_size >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_size reward += -5 elif bot_x <= agent_x + character_size <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: agent_x = bot_x - character_size reward += -2 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_size >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_size reward += -5 elif bot_y <= agent_y + character_size <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: agent_y = bot_y - character_size reward += -2 elif agent_action == 8: agent_x -= character_move_speed if agent_x <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 reward += -5 elif bot_x <= agent_x <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size:  agent_x = bot_x + character_size    reward += -2  if agent_beam_fire == True: if beam_hit_detector("agent"): #print "Bot Got Hit!" bot_hp -= beam_damage reward += 50 agent_beam_size_x = agent_beam_size_y = 0 agent_beam_x = agent_beam_y = beam_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" return reward, cont, successful, winner def bot_beam_dir_detector(): global bot_current_action if bot_current_action == 1: bot_beam_dir = 2 elif bot_current_action == 2: bot_beam_dir = 4 elif bot_current_action == 3: bot_beam_dir = 3 elif bot_current_action == 4: bot_beam_dir = 1 else: bot_beam_dir = 0 return bot_beam_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 100 with tf.Session() as sess: sess.run(initialize) success = 0 for i in tqdm(range(1, num_episodes)): #print "Episode #", i rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), #mapping(complexity, float(agent_hp) / float(character_init_health)), #mapping(complexity, float(bot_hp) / float(character_init_health)), mapping(complexity, float(agent_x - bot_x) / float(arena_x)), mapping(complexity, float(agent_y - bot_y) / float(arena_y)), bot_beam_dir ]])   b = bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state}) r, c, d, winner = action(a + 1, b) bot_beam_dir = bot_beam_dir_detector() next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), #mapping(complexity, float(agent_hp) / float(character_init_health)), #mapping(complexity, float(bot_hp) / float(character_init_health)), mapping(complexity, float(agent_x - bot_x) / float(arena_x)), mapping(complexity, float(agent_y - bot_y) / float(arena_y)), bot_beam_dir ]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r  screen_blit()  if d == True: e = 1. / ((i / 50) + 10) success += 1 break  #print agent_hp, bot_hp  display.update()  jList.append(j)  rList.append(rAll) print winner

Update #3 on August 22, 2017:

added 252 characters in body

Source Link

edited Aug 18, 2017 at 12:42

nedward

Update #1 on August 14th, 2017:

Update #2 on August 18, 2017:

Based on the advice of @NeilSlater, I've implemented experience replay into my model. The algorithm has improved, but I'm going to look for more better improvement options that offer convergence.

Updated the code

Source Link

edited Aug 18, 2017 at 12:13

nedward

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200);   red = (255, 0, 0); green = (0, 255, 0);   blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 3050 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10  jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32)  bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius) draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y: bot_action = 1 else: bot_action = 3 else: bot_action = 9 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: if random.randint(0, 100) > 5: if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9 else: if random.randint(0, 100) > 5: x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \ or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \ agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius): return True else: return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \ bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \ bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \ or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius): return True else: return False elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \ bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10))  def clip(value): if value > 250.0: value = 250.0 elif value < -250.0: value = -250.0 return value  #Environmentdef action(Training)agent_action, Parametersbot_action): #agent_bullet_fire = bot_bullet_fire = False #agent_bullet_dir =global [0bot_bullet_x, 0];bot_bullet_y, bot_bullet_diragent_bullet_x, =agent_bullet_y, [0agent_bullet_fire, 0]\ #The environment: def action(agent_x, agent_y, bot_xbot_bullet_fire, bot_yagent_bullet_direction_x, agent_actionagent_bullet_direction_y, bot_actionbot_bullet_direction_x, agent_hp,\  bot_hp, agent_bullet bot_bullet_direction_y, bot_bulletagent_x, agent_bullet_fireagent_y, bot_bullet_firebot_x, agent_bullet_dirbot_y, bot_bullet_dir): agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, #Bulletlast_bot_bullet_x, Managementlast_bot_bullet_y reward = 0  0; cont = True  True; successful = False False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False:  #If bullet's fired by bot:  bot_bullet_fire = True if bot_action == 1: bot_bullet_dirbot_bullet_direction_x = [0,0; bot_bullet_direction_y = -bullet_speed]bullet_speed elif bot_action == 2: bot_bullet_dirbot_bullet_direction_x = [bullet_speed,bullet_speed; 0]bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_dirbot_bullet_direction_x = [0,0; bullet_speed]bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_dirbot_bullet_direction_x = [-bullet_speed, 0] elif bot_bullet_fire == True: bot_bullet[0] = bot_bullet[0] + bot_bullet_dir[0]; bot_bullet[1] = bot_bullet[1] + bot_bullet_dir[1] if bot_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or bot_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or bot_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or bot_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius: bullet_speed; bot_bullet_firebot_bullet_direction_y = False0 bot_bulletbot_bullet_x = [bot_x, bot_y] if agent_x - character_radius - border <= bot_bullet[0] <= agent_x + character_radius + border and agent_y - character_radius - border < bot_bullet[1] < agent_y + character_radiusbot_x + border: agent_hp -= bullet_damage  bot_bullet_direction_x; rewardbot_bullet_y = -50 if agent_hp <= 0:  contbot_y =+ Falsebot_bullet_direction_y   winner = "Bot" ifelif 5 <= bot_action <= 8: bot_bullet = [bot_x, bot_y] if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1    if 1 <= agent_action <= 4 and agent_bullet_firebot_bullet_fire == FalseTrue: agent_bullet_firelast_bot_bullet_x = True  bot_bullet_x; last_bot_bullet_y = ifbot_bullet_y  agent_action == 1:  bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y if   agent_y - character_radius > disp_y/2 - arena_y +if borderbullet_hit_detector("bot"):   agent_bullet_dirprint ="Agent [0,Got -bullet_speed]Hit!" else: agent_hp -= bullet_damage  reward = -25 elif agent_action == 2:50 if agent_x + character_radius < disp_x/2 + arena_xbot_bullet_fire -= border:False   agent_bullet_dirbot_bullet_direction_x = [bullet_speed, 0]  0; bot_bullet_direction_y = else:0   rewardbot_bullet_x = -25  bot_bullet_y = bullet_ob; last_bot_bullet_x elif= agent_actionlast_bot_bullet_y === 3:bullet_ob if agent_y + character_radius < disp_y/2 + arena_yagent_hp -<= border0: agent_bullet_dircont = [0, bullet_speed]  else:False rewardwinner = -25"Bot" elif agent_action == 4:  bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 ifor agent_xbot_bullet_x - character_radiusbullet_radius ><= disp_x/2 - arena_x/2 +or border:\   bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 agent_bullet_diror =bot_bullet_y [-bullet_speed, 0]  bullet_radius <= disp_y/2 - elsearena_y/2:   rewardbot_bullet_fire = -25False   elif 1 <= agent_action <= 4 and agent_bullet_fire == True:  bot_bullet_direction_x = 0; rewardbot_bullet_direction_y = -200   elif agent_bullet_fire == True:  agent_bullet[0]bot_bullet_x = agent_bullet[0] + agent_bullet_dir[0]; agent_bullet[1]bot_bullet_y = agent_bullet[1] + agent_bullet_dir[1]  bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_bullet[0]1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1: if agent_y - character_radius - border > disp_xdisp_y/2 +- arena_xarena_y/2: agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed reward = 10 else: reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 2: if agent_x + bullet_radiuscharacter_radius or+ agent_bullet[0]border < disp_x/2 -+ arena_x/2: agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0 reward = 10 else: reward = -25  bullet_radius or agent_bullet[1] > disp_y/2 + arena_y/2  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 3: if agent_y + bullet_radiuscharacter_radius or+ agent_bullet[1]border < disp_y/2 -+ arena_y/2:  - bullet_radius agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed reward = 10 else: agent_bullet_fire  reward = False-25 agent_bullet agent_bullet_x = [agent_x,agent_bullet_y agent_y]= bullet_ob agent_bullet_dir  agent_bullet_fire = [0,False  0] elif agent_action == 4: if bot_x - character_radius <= agent_bullet[0] <= bot_x + character_radius and bot_y if agent_x - character_radius <- agent_bullet[1]border <> bot_ydisp_x/2 +- character_radiusarena_x/2: bot_hp  agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = bullet_damage0   reward = 10010 agent_bullet_fire else:  reward = False-25 agent_bullet_dir  agent_bullet_x = [0,agent_bullet_y 0]= bullet_ob if bot_hp <= 0: agent_bullet_fire = False successfulif =agent_bullet_fire == True: cont  agent_bullet_x = Falseagent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y winner  last_agent_bullet_x = "Agent" agent_bullet_x; last_agent_bullet_y = agent_bullet_y  if 5 <= agent_action <= 8 else: agent_bullet reward = [agent_x,-20  agent_y] elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2 + character_radius + 1: agent_y = disp_y/2 - arena_y/2 + character_radius + 1border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2 - character_radius - 1: agent_x = disp_x/2 + arena_x/2 - character_radius - 1border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2 - character_radius - 1: agent_y = disp_y/2 + arena_y/2 - character_radius - 1border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2 + character_radius + 1: agent_x = disp_x/2 - arena_x/2 + character_radius + 1border reward = -5 else: reward = 5  if agent_bullet_fire == True: last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob return reward, cont, successful, winner   return reward, cont, successful, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_bullet, bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0  s = 0  0; d = False  agent_x = int(list(init_character_a_state)[0]); agent_y = int(list(init_character_a_state)[1]) bot_x = int(list(init_character_b_state)[0]); bot_y = int(list(init_character_b_state)[1])  agent_hp = bot_hp = int(character_init_health) bot_bullet = list(init_character_b_state); agent_bullet = list(init_character_a_state) agent_bullet_fire = bot_bullet_fire = False agent_bullet_dir = [0, 0];False; bot_bullet_dirc = [0, 0]  True; j = 0   c = Trueparam_init() samples = [] while c == True:  disp.fill(aqua) draw.rect(disp, black, (disp_x/2 - arena_x/2 - border, disp_y/2 - arena_y/2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x/2 - arena_x/2, disp_y/2 - arena_y/2, arena_x, arena_y)) draw.circle(disp, black, agent_bullet, bullet_radius) draw.circle(disp, black, bot_bullet, bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp)/float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp)/float(character_init_health) * 100, 14))  j += 1 """ ---CURRENT STATE--- Evenything will be on a scale of 0 to "complexity". screen_blit(0 = 0, "complexity" = max) """ complexity = 100 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float( agent_y) / float(arena_y)),   mapping(complexity, float( bot_x) / float(arena_x)),   mapping(complexity, float( bot_y) / float(arena_y)),   mapping(complexity, float( bot_bullet[0]bot_bullet_x / float(arena_x))),   mapping(complexity, float( bot_bullet[1]bot_bullet_y / float(arena_y))),   mapping(complexity, abs( float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]])   #current_state is the array of parameters for feeding the neural network  #print current_state a,allQ = sess.runbot_take_action([predict, Q],feed_dict={input_layer : current_state}) #bot move #a[0] = moves[0]   #moves = moves[1:] #1~4 are shooting a bullet. 5~8 are movement. 9 is doing nothing. if agent_x - character_radius <= bot_x <= agent_x + character_radius:  if agent_y <= bot_y: if randomnp.randint(0, 100) > 20: bot_action = 1 else: if random.randintrand(0, 1001) > 20:  bot_action = 3 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: < e ifor agent_xi <= bot_x: if random.randint(0, 100) > 205:   bot_actiona = 4 else:  if random.randint(0, 1008) > 20: bot_action = 2 else: if random.randint(0, 100) > 20: #Find opponenta, caluculate x and y distance and go the shortest way  x_dist = abs(bot_x - agent_x); y_dist_ = abssess.run(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 [predict, Q],feed_dict={input_layer else:  bot_action = random.randint(1, 9current_state}) if np.random.rand(1) < e: a[0] = random.randint(0,8) #Action: Takes positions and actions. r, c, d, new_agent_x, new_agent_y, new_bot_x, new_bot_y, new_agent_hp, new_bot_hp, new_agent_bullet, new_bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir = action(agent_x, agent_y, bot_x,a bot_y,+ int(a[0]+1)1, bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir) next_state = np.array([[mapping(complexity, float(new_agent_xagent_x) / float(arena_x)),   mapping(complexity, float( new_agent_yagent_y) / float(arena_y)),   mapping(complexity, float( new_bot_xbot_x) / float(arena_x)),   mapping(complexity, float( new_bot_ybot_y) / float(arena_y)),   mapping(complexity, float( new_bot_bullet[0]bot_bullet_x / float(arena_x))),   mapping(complexity, float( new_bot_bullet[1]bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float( new_agent_xagent_x - new_bot_xbot_x)) / float(disp_xarena_x)),   mapping(complexity, abs(float(new_agent_yagent_y - new_bot_ybot_y)) / float(disp_yarena_y))]]) #Q1 = sess.run(Q, feed_dict = {input_layer : next_state}) #maxQ1 = np.max(Q1) #targetQ = allQ #targetQ[0,a[0]] = r + y * maxQ1 #for index, buf in enumerate(targetQ[0]): # targetQ[0][index] = clip(targetQ[0][index]) samples.append([current_state, a, r, next_state])  #print samples #print len(samples) allQ = sess.run(Q, feed_dict={input_layer : current_state}) print allQ  if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ #print batch_targetQ batch_targetQ[0,a[0]]batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ})  #print a[0] + 1, r, targetQ[0]  rAll += r  bot_x = new_bot_x; bot_y = new_bot_y; agent_x = new_agent_x; agent_y = new_agent_y; agent_hp = new_agent_hp; bot_hp = new_bot_hp; agent_bullet = new_agent_bullet; bot_bullet = new_bot_bullet  if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update()  #sleep(1) jList.append(j)  rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList)  plt.show()

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200); red = (255, 0, 0); green = (0, 255, 0); blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 30 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10  jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10))  def clip(value): if value > 250.0: value = 250.0 elif value < -250.0: value = -250.0 return value  #Environment (Training) Parameters: #agent_bullet_fire = bot_bullet_fire = False #agent_bullet_dir = [0, 0]; bot_bullet_dir = [0, 0] #The environment: def action(agent_x, agent_y, bot_x, bot_y, agent_action, bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir):  #Bullet Management reward = 0  cont = True  successful = False  winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False:  #If bullet's fired by bot:  bot_bullet_fire = True if bot_action == 1: bot_bullet_dir = [0, -bullet_speed] elif bot_action == 2: bot_bullet_dir = [bullet_speed, 0] elif bot_action == 3: bot_bullet_dir = [0, bullet_speed] elif bot_action == 4: bot_bullet_dir = [-bullet_speed, 0] elif bot_bullet_fire == True: bot_bullet[0] = bot_bullet[0] + bot_bullet_dir[0]; bot_bullet[1] = bot_bullet[1] + bot_bullet_dir[1] if bot_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or bot_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or bot_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or bot_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius:  bot_bullet_fire = False bot_bullet = [bot_x, bot_y] if agent_x - character_radius - border <= bot_bullet[0] <= agent_x + character_radius + border and agent_y - character_radius - border < bot_bullet[1] < agent_y + character_radius + border: agent_hp -= bullet_damage  reward = -50 if agent_hp <= 0:  cont = False   winner = "Bot" if 5 <= bot_action <= 8: bot_bullet = [bot_x, bot_y] if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1    if 1 <= agent_action <= 4 and agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1:  if agent_y - character_radius > disp_y/2 - arena_y + border:   agent_bullet_dir = [0, -bullet_speed] else:  reward = -25 elif agent_action == 2: if agent_x + character_radius < disp_x/2 + arena_x - border:   agent_bullet_dir = [bullet_speed, 0]  else:   reward = -25  elif agent_action == 3: if agent_y + character_radius < disp_y/2 + arena_y - border: agent_bullet_dir = [0, bullet_speed]  else: reward = -25 elif agent_action == 4:  if agent_x - character_radius > disp_x/2 - arena_x + border:   agent_bullet_dir = [-bullet_speed, 0]  else:   reward = -25   elif 1 <= agent_action <= 4 and agent_bullet_fire == True:  reward = -20   elif agent_bullet_fire == True:  agent_bullet[0] = agent_bullet[0] + agent_bullet_dir[0]; agent_bullet[1] = agent_bullet[1] + agent_bullet_dir[1]   if agent_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or agent_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or agent_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or agent_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius: agent_bullet_fire = False agent_bullet = [agent_x, agent_y] agent_bullet_dir = [0, 0] if bot_x - character_radius <= agent_bullet[0] <= bot_x + character_radius and bot_y - character_radius < agent_bullet[1] < bot_y + character_radius: bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_dir = [0, 0] if bot_hp <= 0: successful = True cont = False winner = "Agent"  if 5 <= agent_action <= 8: agent_bullet = [agent_x, agent_y] if agent_action == 5: agent_y -= character_move_speed if agent_y <= disp_y/2 - arena_y/2 + character_radius + 1: agent_y = disp_y/2 - arena_y/2 + character_radius + 1 reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x >= disp_x/2 + arena_x/2 - character_radius - 1: agent_x = disp_x/2 + arena_x/2 - character_radius - 1 reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y >= disp_y/2 + arena_y/2 - character_radius - 1: agent_y = disp_y/2 + arena_y/2 - character_radius - 1 reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x <= disp_x/2 - arena_x/2 + character_radius + 1: agent_x = disp_x/2 - arena_x/2 + character_radius + 1 reward = -5 else: reward = 5  return reward, cont, successful, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_bullet, bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir  with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(num_episodes): rAll = 0  s = 0  d = False  agent_x = int(list(init_character_a_state)[0]); agent_y = int(list(init_character_a_state)[1]) bot_x = int(list(init_character_b_state)[0]); bot_y = int(list(init_character_b_state)[1])  agent_hp = bot_hp = int(character_init_health) bot_bullet = list(init_character_b_state); agent_bullet = list(init_character_a_state) agent_bullet_fire = bot_bullet_fire = False agent_bullet_dir = [0, 0]; bot_bullet_dir = [0, 0]  j = 0   c = True samples = [] while c == True:  disp.fill(aqua) draw.rect(disp, black, (disp_x/2 - arena_x/2 - border, disp_y/2 - arena_y/2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x/2 - arena_x/2, disp_y/2 - arena_y/2, arena_x, arena_y)) draw.circle(disp, black, agent_bullet, bullet_radius) draw.circle(disp, black, bot_bullet, bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp)/float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp)/float(character_init_health) * 100, 14))  j += 1 """ ---CURRENT STATE--- Evenything will be on a scale of 0 to "complexity". (0 = 0, "complexity" = max) """ complexity = 100 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float( agent_y) / float(arena_y)), mapping(complexity, float( bot_x) / float(arena_x)), mapping(complexity, float( bot_y) / float(arena_y)), mapping(complexity, float( bot_bullet[0] / float(arena_x))), mapping(complexity, float( bot_bullet[1] / float(arena_y))), mapping(complexity, abs( float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]])   #current_state is the array of parameters for feeding the neural network  #print current_state a,allQ = sess.run([predict, Q],feed_dict={input_layer : current_state}) #bot move #a[0] = moves[0]   #moves = moves[1:] #1~4 are shooting a bullet. 5~8 are movement. 9 is doing nothing. if agent_x - character_radius <= bot_x <= agent_x + character_radius:  if agent_y <= bot_y: if random.randint(0, 100) > 20: bot_action = 1 else: if random.randint(0, 100) > 20:  bot_action = 3 elif agent_y - character_radius <= bot_y <= agent_y + character_radius:  if agent_x <= bot_x: if random.randint(0, 100) > 20:   bot_action = 4 else:  if random.randint(0, 100) > 20: bot_action = 2 else: if random.randint(0, 100) > 20: #Find opponent, caluculate x and y distance and go the shortest way  x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5  else:  bot_action = random.randint(1, 9) if np.random.rand(1) < e: a[0] = random.randint(0,8) #Action: Takes positions and actions. r, c, d, new_agent_x, new_agent_y, new_bot_x, new_bot_y, new_agent_hp, new_bot_hp, new_agent_bullet, new_bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir = action(agent_x, agent_y, bot_x, bot_y, int(a[0]+1), bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir) next_state = np.array([[mapping(complexity, float(new_agent_x) / float(arena_x)), mapping(complexity, float( new_agent_y) / float(arena_y)), mapping(complexity, float( new_bot_x) / float(arena_x)), mapping(complexity, float( new_bot_y) / float(arena_y)), mapping(complexity, float( new_bot_bullet[0] / float(arena_x))), mapping(complexity, float( new_bot_bullet[1] / float(arena_y))), mapping(complexity, float( new_agent_x - new_bot_x) / float(disp_x)), mapping(complexity, float(new_agent_y - new_bot_y) / float(disp_y))]]) #Q1 = sess.run(Q, feed_dict = {input_layer : next_state}) #maxQ1 = np.max(Q1) #targetQ = allQ #targetQ[0,a[0]] = r + y * maxQ1 #for index, buf in enumerate(targetQ[0]): # targetQ[0][index] = clip(targetQ[0][index]) samples.append([current_state, a, r, next_state])  #print samples #print len(samples) allQ = sess.run(Q, feed_dict={input_layer : current_state}) print allQ  if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ #print batch_targetQ batch_targetQ[0,a[0]] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ})  #print a[0] + 1, r, targetQ[0]  rAll += r  bot_x = new_bot_x; bot_y = new_bot_y; agent_x = new_agent_x; agent_y = new_agent_y; agent_hp = new_agent_hp; bot_hp = new_bot_hp; agent_bullet = new_agent_bullet; bot_bullet = new_bot_bullet  if d == True: e = 1./((i/50) + 10) success += 1 break display.update()  #sleep(1) jList.append(j)  rList.append(rAll) print winner plt.plot(rList) plt.show()

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200)  red = (255, 0, 0); green = (0, 255, 0)  blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32)  bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius) draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y: bot_action = 1 else: bot_action = 3 else: bot_action = 9 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: if random.randint(0, 100) > 5: if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9 else: if random.randint(0, 100) > 5: x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \ or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \ agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius): return True else: return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \ bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \ bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \ or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius): return True else: return False elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \ bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \   bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_y reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_fire = True if bot_action == 1: bot_bullet_direction_x = 0; bot_bullet_direction_y = -bullet_speed elif bot_action == 2: bot_bullet_direction_x = bullet_speed; bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_direction_x = 0; bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_direction_x = -bullet_speed; bot_bullet_direction_y = 0 bot_bullet_x = bot_x + bot_bullet_direction_x; bot_bullet_y = bot_y + bot_bullet_direction_y elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1 if bot_bullet_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y  bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y    if bullet_hit_detector("bot"): print "Agent Got Hit!" agent_hp -= bullet_damage  reward = -50 bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_hp <= 0: cont = False winner = "Bot" elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1: if agent_y - character_radius - border > disp_y/2 - arena_y/2: agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed reward = 10 else: reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 2: if agent_x + character_radius + border < disp_x/2 + arena_x/2: agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0 reward = 10 else: reward = -25    agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 3: if agent_y + character_radius + border < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed reward = 10 else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 4:  if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = 0   reward = 10  else:  reward = -25   agent_bullet_x = agent_bullet_y = bullet_ob  agent_bullet_fire = False if agent_bullet_fire == True:   agent_bullet_x = agent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y   else:  reward = -20   elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward = -5 else: reward = 5  if agent_bullet_fire == True: last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob return reward, cont, successful, winner  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1 screen_blit() current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float(agent_y) / float(arena_y)),   mapping(complexity, float(bot_x) / float(arena_x)),   mapping(complexity, float(bot_y) / float(arena_y)),   mapping(complexity, float(bot_bullet_x / float(arena_x))),   mapping(complexity, float(bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state}) r, c, d, winner = action(a + 1, bot_action) next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float(agent_y) / float(arena_y)),   mapping(complexity, float(bot_x) / float(arena_x)),   mapping(complexity, float(bot_y) / float(arena_y)),   mapping(complexity, float(bot_bullet_x / float(arena_x))),   mapping(complexity, float(bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update() rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList)  plt.show()