Skip to main content
Changed code and updated explanation
Source Link
nedward
  • 414
  • 5
  • 13
from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf from pylab import savefig from tqdm import tqdm  #Screen Setup disp_x, disp_y = 15001000, 1000800 arena_x, arena_y = 8001000, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) green_yellow = (173, 255, 47); energy_blue = (125, 249, 255)  #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radiuscharacter_size = 3050 character_move_speed = 2025 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damagebeam_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_posbeam_width = list(init_character_b_state)10 bullet_a_fire = False; bullet_b_firebeam_ob = False-100 #The Neural Network input_layer = tf.placeholder(shape=[1,8]7],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8[7,9],0,0.1)) #weight_2 = tf.Variable(tf.random_uniform([6,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer()    jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() bot_beam_dir = int() agent_beam_fire = bot_beam_fire = bool() agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = int() agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = int() bot_current_action = agent_current_action = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fireagent_x, \  bot_bullet_fireagent_y, agent_bullet_direction_xbot_x, agent_bullet_direction_ybot_y, bot_bullet_direction_xagent_hp, \  bot_bullet_direction_ybot_hp, agent_xagent_beam_fire, agent_ybot_beam_fire, bot_xagent_beam_x, bot_ybot_beam_x, agent_hpagent_beam_y, bot_hpbot_beam_y  agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0  agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health  agent_beam_fire = bot_beam_fire = False agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = beam_ob agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = 0  def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_xcharacter_size, agent_x, \ agent_bullet_yagent_y, bullet_radiusbot_x, bot_bullet_xbot_y, bot_bullet_ycharacter_init_health, character_radiusagent_hp, agent_xbot_hp, red, blue, aqua, green, black, green_yellow, energy_blue, \ agent_yagent_beam_fire, bot_xbot_beam_fire, bot_yagent_beam_x, character_init_healthagent_beam_y, agent_hpbot_beam_x, bot_hpbot_beam_y, redagent_beam_size_x, blueagent_beam_size_y, aquabot_beam_size_x, greenbot_beam_size_y, blackbeam_width disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black,if [agent_bullet_x,bot_beam_fire agent_bullet_y],== bullet_radius)True:   draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius)  draw.circlerect(disp, blackgreen_yellow, (agent_xagent_beam_x, agent_y)agent_beam_y,  agent_beam_size_x, agent_beam_size_y))  bot_beam_fire = False  character_radius + border_2)  if agent_beam_fire == draw.circle(disp,True:  red, (agent_x, agent_y), character_radius)  draw.circlerect(disp, blackenergy_blue, (bot_xbot_beam_x, bot_y)bot_beam_y, bot_beam_size_x, bot_beam_size_y)) agent_beam_fire = False  draw.rect(disp, red, character_radius(agent_x, +agent_y, border_2character_size, character_size)) draw.circlerect(disp, blue, (bot_x, bot_y), character_radiuscharacter_size, character_size))   draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14))    def bot_take_action(): return random.randint(1, 9) def beam_hit_detector(player): global agent_x, agent_y, bot_x, bot_y, character_radiusagent_beam_fire, bot_actionbot_beam_fire, borderagent_beam_x, \ ifbot_beam_x, agent_xagent_beam_y, -bot_beam_y, character_radiusagent_beam_size_x, -agent_beam_size_y, border\  <= bot_x <= agent_xbot_beam_size_x, +bot_beam_size_y, character_radiusbot_current_action, +agent_current_action, borderbeam_width, character_size if player == "bot": if random.randint(0, 100)bot_current_action >== 51: if disp_y/2 - arena_y/2 <= agent_y <= bot_y and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size):   bot_action =return 1True else: bot_action =return 3False   else:  elif bot_current_action == 2:  bot_action = 9  elif agent_y -if character_radiusbot_x <= bot_yagent_x <= agent_ydisp_x/2 + character_radius:  arena_x/2 ifand random.randint(0, 100) >agent_y 5: < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y if< agent_xagent_y <=+ bot_xcharacter_size): bot_action =return 4True else: bot_action = 2 else: bot_action = 9 return else:False if random.randint(0,elif 100)bot_current_action >== 53:   x_dist = abs(bot_x - agent_x); y_dist =if abs(bot_y -<= agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0:  disp_y/2 + bot_actionarena_y/2 =and 6 (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + elsecharacter_size):   bot_action =return 8True else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player ==return "bot":False ifelif bot_bullet_xbot_current_action == last_bot_bullet_x4: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border:  #If the current state of the bullet is touchingdisp_x/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y2 - bullet_radius > agent_yarena_x/2 +<= character_radiusagent_x +<= borderbot_x and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \  or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_ybot_beam_y -+ bullet_radiusbeam_width < agent_y + character_radius + bordercharacter_size or \  agent_y - character_radius - border < bot_bullet_y + bullet_radiusbot_beam_y < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radiuscharacter_size):   return True   else:   return False else: if agent_bullet_xagent_current_action == last_agent_bullet_x1: if bot_x - character_radiusdisp_y/2 - borderarena_y/2 <<= agent_bullet_xbot_y +<= bullet_radiusagent_y <and (bot_x +< character_radiusagent_beam_x + border or \  beam_width < bot_x -+ character_radiuscharacter_size -or borderbot_x < agent_bullet_x - bullet_radiusagent_beam_x < bot_x + character_radius + bordercharacter_size):   #Ifreturn theTrue  current state of the bullet is touching/inside the agent else: ifreturn bot_yFalse  - character_radius - border < agent_bullet_y + bullet_radiuselif <agent_current_action bot_y== +2:  character_radius or \  if agent_x <= bot_x <= disp_x/2 + bot_yarena_x/2 -and character_radius(bot_y < agent_bullet_yagent_beam_y -+ bullet_radiusbeam_width < bot_y + character_radiuscharacter_size +or border: bot_y < agent_beam_y < bot_y + character_size):  return True   else:  #If the bullet "passed through" the character from the last turn:return False elif agent_current_action == 3:  elif (last_agent_bullet_y - bullet_radius > if agent_y <= bot_y +<= character_radiusdisp_y/2 + borderarena_y/2 and bot_y(bot_x -< character_radiusagent_beam_x -+ borderbeam_width >< agent_bullet_ybot_x + bullet_radius)character_size \ or bot_x < agent_beam_x < bot_x + character_size):  or (agent_bullet_y - bullet_radius > bot_y + character_radiusreturn +True  border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius) else:   return TrueFalse elif bot_current_action == 4:  else:  if disp_x/2 - arena_x/2 <= bot_x <= agent_x and (bot_y < agent_beam_y + returnbeam_width False < bot_y + character_size or bot_y < agent_beam_y elif< agent_bullet_ybot_y ==+ last_agent_bullet_ycharacter_size): if bot_y - character_radius -return borderTrue  < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \else:   bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius +return border:False  #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(absnumber#int(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_xagent_x, bot_bullet_yagent_y, agent_bullet_xbot_x, agent_bullet_ybot_y, agent_bullet_fireagent_hp, bot_hp, agent_beam_fire, \ bot_bullet_firebot_beam_fire, agent_bullet_direction_xagent_beam_x, agent_bullet_direction_ybot_beam_x, bot_bullet_direction_xagent_beam_y, bot_beam_y, agent_beam_size_x, \ bot_bullet_direction_yagent_beam_size_y, agent_xbot_beam_size_x, agent_ybot_beam_size_y, bot_xbeam_width, bot_yagent_current_action, agent_hpbot_current_action, bot_hp,character_size  last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_yagent_current_action = agent_action; bot_current_action = bot_action reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_firebot_beam_fire = True if bot_action == 1: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = disp_y/2 -bullet_speed arena_y/2 bot_beam_size_x = beam_width; bot_beam_size_y = bot_y - disp_y/2 + arena_y/2 elif bot_action == 2: bot_bullet_direction_xbot_beam_x = bullet_speed;bot_x bot_bullet_direction_y+ character_size; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_beam_size_x = disp_x/2 + arena_x/2 - bot_x - character_size; bot_beam_size_y = beam_width elif bot_action == 3: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = bullet_speedbot_y + character_size bot_beam_size_x = beam_width; bot_beam_size_y = disp_y/2 + arena_y/2 - bot_y - character_size elif bot_action == 4: bot_bullet_direction_xbot_beam_x = disp_x/2 -bullet_speed; bot_bullet_direction_yarena_x/2; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_bullet_x bot_beam_size_x = bot_x +- bot_bullet_direction_x;disp_x/2 bot_bullet_y+ =arena_x/2; bot_ybot_beam_size_y += bot_bullet_direction_ybeam_width elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 elif agent_y <= bot_y <= agent_y + character_radiuscharacter_size:  bot_y = agent_y + 1character_size elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1character_size: bot_x = disp_x/2 + arena_x/2 - character_radiuscharacter_size  elif agent_x <= bot_x + character_size <= agent_x + character_size: bot_x = agent_x - 1character_size elif bot_action == 7: bot_y += character_move_speed if bot_y + character_size >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radiuscharacter_size elif agent_y <= bot_y + character_size <= agent_y + character_size: bot_y = agent_y - 1character_size elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 elif agent_x <= bot_x <= agent_x + character_radiuscharacter_size:  bot_x = agent_x + 1character_size if bot_bullet_firebot_beam_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y  if bullet_hit_detectorbeam_hit_detector("bot"): print#print "Agent Got Hit!" agent_hp -= bullet_damagebeam_damage reward =+= -50 bot_bullet_fire = False bot_bullet_direction_xbot_beam_size_x = 0; bot_bullet_direction_ybot_beam_size_y = 0   bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_xbot_beam_x = last_bot_bullet_ybot_beam_y = bullet_obbeam_ob if agent_hp <= 0: cont = False winner = "Bot"  elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob  if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fireagent_beam_fire = True   if agent_action == 1:   if agent_y - character_radius - border > disp_y/2 - arena_y/2:   agent_beam_x = agent_bullet_direction_xagent_x =- 0;beam_width/2; agent_bullet_direction_yagent_beam_y = disp_y/2 -bullet_speed  arena_y/2  agent_beam_size_x = beam_width; rewardagent_beam_size_y = 10 agent_y - disp_y/2 + arena_y/2  else:   reward =+= -25 agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False   elif agent_action == 2:   if agent_x + character_radius + bordercharacter_size < disp_x/2 + arena_x/2:   agent_bullet_direction_x = bullet_speed; agent_bullet_direction_yagent_beam_x = 0  agent_x + character_size; rewardagent_beam_y = 10 agent_y + character_size/2 - else:beam_width/2   rewardagent_beam_size_x = -25 disp_x/2 + arena_x/2 - agent_x - character_size; agent_beam_size_y = beam_width  agent_bullet_x = agent_bullet_y = bullet_obelse:   agent_bullet_firereward =+= False-25   elif agent_action == 3:   if agent_y + character_radius + bordercharacter_size < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_yagent_beam_x = bullet_speed  agent_x + character_size/2 - beam_width/2; rewardagent_beam_y = 10  agent_y + else:character_size   rewardagent_beam_size_x = -25 beam_width; agent_beam_size_y = disp_y/2 + arena_y/2 - agent_y - character_size  agent_bullet_x = agent_bullet_y = bullet_obelse:   agent_bullet_firereward =+= False-25   elif agent_action == 4:   if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_yagent_beam_x = 0  disp_x/2 - arena_x/2; rewardagent_beam_y = 10  else: agent_y reward+ =character_size/2 -25  agent_bullet_x = agent_bullet_y = bullet_obbeam_width/2   agent_bullet_fireagent_beam_size_x = False  if agent_bullet_fire == True: agent_bullet_xagent_x =- agent_xdisp_x/2 + agent_bullet_direction_x;arena_x/2; agent_bullet_yagent_beam_size_y = agent_y + agent_bullet_direction_ybeam_width   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_yelse: else:  reward =+= -2025  elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward =+= -5 elseelif bot_y <= agent_y <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y + character_size reward += -2 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + bordercharacter_size >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_x <= agent_x + character_size <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: rewardagent_x = 5bot_x - character_size reward += -2 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + bordercharacter_size >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_y <= agent_y + character_size <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y - character_size reward += -2 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward =+= -5 else: elif bot_x <= agent_x <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size:  reward = 5  if agent_bullet_fire == True:  agent_x = last_agent_bullet_xbot_x =+ agent_bullet_x;character_size  last_agent_bullet_y = agent_bullet_y  agent_bullet_x += agent_bullet_direction_x; agent_bullet_y  reward += agent_bullet_direction_y-2  if agent_beam_fire == True: if bullet_hit_detectorbeam_hit_detector("agent"): print#print "Bot Got Hit!" bot_hp -= bullet_damagebeam_damage reward = 100  agent_bullet_fire =+= False50 agent_bullet_direction_xagent_beam_size_x = 0; agent_bullet_direction_yagent_beam_size_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_xagent_beam_x = last_agent_bullet_yagent_beam_y = bullet_obbeam_ob if bot_hp <= 0: successful = True cont = False winner = "Agent"  elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob  return reward, cont, successful, winner def bot_beam_dir_detector(): global bot_current_action if bot_current_action == 1: bot_beam_dir = 2 elif bot_current_action == 2: bot_beam_dir = 4 elif bot_current_action == 3: bot_beam_dir = 3 elif bot_current_action == 4: bot_beam_dir = 1 else: bot_beam_dir = 0 return bot_beam_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10100 with tf.Session() as sess: sess.run(initialize) success = 0 for i in tqdm(range(1, num_episodes)): #print "Episode #", i rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1  screen_blit()  current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]])   b = bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state})    r, c, d, winner = action(a + 1, bot_actionb) bot_beam_dir = bot_beam_dir_detector() next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r  screen_blit()  if d == True: e = 1. / ((i / 50) + 10) success += 1 break  #print agent_hp, bot_hp  display.update()  jList.append(j)  rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList) plt.show() 

Update #3 on August 22, 2017:

I've noticed that if the agent hits the bot with a bullet on a turn and the action the bot taken on that turn was not "fire a bullet", then the wrong actions would be given credit. Thus, I've turned the bullets into beams so the bot/agent takes damage on the turn the beam's fired.

from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer()    jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \  bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \  bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp  agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0  agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius)   draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius)  draw.circle(disp, black, (agent_x, agent_y),  character_radius + border_2)  draw.circle(disp, red, (agent_x, agent_y), character_radius)  draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y:   bot_action = 1 else: bot_action = 3   else:  bot_action = 9  elif agent_y - character_radius <= bot_y <= agent_y + character_radius:  if random.randint(0, 100) > 5:  if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9  else: if random.randint(0, 100) > 5:   x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0:  bot_action = 6  else:   bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border:  #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \  or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \  agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius):   return True   else:   return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \  bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border:   #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \  bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border:  return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \  or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius):   return True else:  return False  elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \   bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border:  #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_y reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_fire = True if bot_action == 1: bot_bullet_direction_x = 0; bot_bullet_direction_y = -bullet_speed elif bot_action == 2: bot_bullet_direction_x = bullet_speed; bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_direction_x = 0; bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_direction_x = -bullet_speed; bot_bullet_direction_y = 0 bot_bullet_x = bot_x + bot_bullet_direction_x; bot_bullet_y = bot_y + bot_bullet_direction_y elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1 if bot_bullet_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y  if bullet_hit_detector("bot"): print "Agent Got Hit!" agent_hp -= bullet_damage reward = -50 bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0   bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_hp <= 0: cont = False winner = "Bot"  elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob  if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True   if agent_action == 1:   if agent_y - character_radius - border > disp_y/2 - arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed  reward = 10  else:   reward = -25 agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False   elif agent_action == 2:   if agent_x + character_radius + border < disp_x/2 + arena_x/2:   agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0  reward = 10  else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 3:   if agent_y + character_radius + border < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed  reward = 10  else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 4:   if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = 0  reward = 10  else:  reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False  if agent_bullet_fire == True: agent_bullet_x = agent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y else:  reward = -20 elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward = -5 else:  reward = 5  if agent_bullet_fire == True:  last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y  agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100  agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent"  elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob  return reward, cont, successful, winner #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1  screen_blit()  current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping(complexity, float(bot_bullet_x / float(arena_x))), mapping(complexity, float(bot_bullet_y / float(arena_y))), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state})    r, c, d, winner = action(a + 1, bot_action) next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping(complexity, float(bot_bullet_x / float(arena_x))), mapping(complexity, float(bot_bullet_y / float(arena_y))), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update() rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList) plt.show() 
from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random import tensorflow as tf from pylab import savefig from tqdm import tqdm  #Screen Setup disp_x, disp_y = 1000, 800 arena_x, arena_y = 1000, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) green_yellow = (173, 255, 47); energy_blue = (125, 249, 255)  #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_size = 50 character_move_speed = 25 #Initialize character stats character_init_health = 100 #initialize bullet stats beam_damage = 10 beam_width = 10 beam_ob = -100 #The Neural Network input_layer = tf.placeholder(shape=[1,7],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([7,9],0,0.1)) #weight_2 = tf.Variable(tf.random_uniform([6,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) #CHARACTER/BULLET PARAMETERS agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() bot_beam_dir = int() agent_beam_fire = bot_beam_fire = bool() agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = int() agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = int() bot_current_action = agent_current_action = int() def param_init(): """Initializes parameters""" global agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_beam_fire, bot_beam_fire, agent_beam_x, bot_beam_x, agent_beam_y, bot_beam_y agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health  agent_beam_fire = bot_beam_fire = False agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = beam_ob agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = 0  def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, character_size, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black, green_yellow, energy_blue, \ agent_beam_fire, bot_beam_fire, agent_beam_x, agent_beam_y, bot_beam_x, bot_beam_y, agent_beam_size_x, agent_beam_size_y, bot_beam_size_x, bot_beam_size_y, beam_width disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) if bot_beam_fire == True: draw.rect(disp, green_yellow, (agent_beam_x, agent_beam_y, agent_beam_size_x, agent_beam_size_y))  bot_beam_fire = False  if agent_beam_fire == True:  draw.rect(disp, energy_blue, (bot_beam_x, bot_beam_y, bot_beam_size_x, bot_beam_size_y)) agent_beam_fire = False  draw.rect(disp, red, (agent_x, agent_y, character_size, character_size)) draw.rect(disp, blue, (bot_x, bot_y, character_size, character_size))   draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14))    def bot_take_action(): return random.randint(1, 9) def beam_hit_detector(player): global agent_x, agent_y, bot_x, bot_y, agent_beam_fire, bot_beam_fire, agent_beam_x, \ bot_beam_x, agent_beam_y, bot_beam_y, agent_beam_size_x, agent_beam_size_y, \  bot_beam_size_x, bot_beam_size_y, bot_current_action, agent_current_action, beam_width, character_size if player == "bot": if bot_current_action == 1: if disp_y/2 - arena_y/2 <= agent_y <= bot_y and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size): return True else: return False elif bot_current_action == 2:  if bot_x <= agent_x <= disp_x/2 + arena_x/2 and (agent_y < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y < agent_y + character_size): return True else: return False elif bot_current_action == 3: if bot_y <= agent_y <= disp_y/2 + arena_y/2 and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size): return True else: return False elif bot_current_action == 4: if disp_x/2 - arena_x/2 <= agent_x <= bot_x and (agent_y < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y < agent_y + character_size): return True else: return False else: if agent_current_action == 1: if disp_y/2 - arena_y/2 <= bot_y <= agent_y and (bot_x < agent_beam_x + beam_width < bot_x + character_size or bot_x < agent_beam_x < bot_x + character_size): return True   else: return False  elif agent_current_action == 2:  if agent_x <= bot_x <= disp_x/2 + arena_x/2 and (bot_y < agent_beam_y + beam_width < bot_y + character_size or bot_y < agent_beam_y < bot_y + character_size):  return True   else:  return False elif agent_current_action == 3:   if agent_y <= bot_y <= disp_y/2 + arena_y/2 and (bot_x < agent_beam_x + beam_width < bot_x + character_size or bot_x < agent_beam_x < bot_x + character_size):  return True   else: return False elif bot_current_action == 4:  if disp_x/2 - arena_x/2 <= bot_x <= agent_x and (bot_y < agent_beam_y + beam_width < bot_y + character_size or bot_y < agent_beam_y < bot_y + character_size): return True  else: return False def mapping(maximum, number): return number#int(number * maximum) def action(agent_action, bot_action): global agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_beam_fire, \ bot_beam_fire, agent_beam_x, bot_beam_x, agent_beam_y, bot_beam_y, agent_beam_size_x, \ agent_beam_size_y, bot_beam_size_x, bot_beam_size_y, beam_width, agent_current_action, bot_current_action, character_size  agent_current_action = agent_action; bot_current_action = bot_action reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4: bot_beam_fire = True if bot_action == 1: bot_beam_x = bot_x + character_size/2 - beam_width/2; bot_beam_y = disp_y/2 - arena_y/2 bot_beam_size_x = beam_width; bot_beam_size_y = bot_y - disp_y/2 + arena_y/2 elif bot_action == 2: bot_beam_x = bot_x + character_size; bot_beam_y = bot_y + character_size/2 - beam_width/2 bot_beam_size_x = disp_x/2 + arena_x/2 - bot_x - character_size; bot_beam_size_y = beam_width elif bot_action == 3: bot_beam_x = bot_x + character_size/2 - beam_width/2; bot_beam_y = bot_y + character_size bot_beam_size_x = beam_width; bot_beam_size_y = disp_y/2 + arena_y/2 - bot_y - character_size elif bot_action == 4: bot_beam_x = disp_x/2 - arena_x/2; bot_beam_y = bot_y + character_size/2 - beam_width/2  bot_beam_size_x = bot_x - disp_x/2 + arena_x/2; bot_beam_size_y = beam_width elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2: bot_y = disp_y/2 - arena_y/2 elif agent_y <= bot_y <= agent_y + character_size:  bot_y = agent_y + character_size elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_size: bot_x = disp_x/2 + arena_x/2 - character_size  elif agent_x <= bot_x + character_size <= agent_x + character_size: bot_x = agent_x - character_size elif bot_action == 7: bot_y += character_move_speed if bot_y + character_size >= disp_y/2 + arena_y/2: bot_y = disp_y/2 + arena_y/2 - character_size elif agent_y <= bot_y + character_size <= agent_y + character_size: bot_y = agent_y - character_size elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2: bot_x = disp_x/2 - arena_x/2 elif agent_x <= bot_x <= agent_x + character_size:  bot_x = agent_x + character_size if bot_beam_fire == True: if beam_hit_detector("bot"): #print "Agent Got Hit!" agent_hp -= beam_damage reward += -50 bot_beam_size_x = bot_beam_size_y = 0 bot_beam_x = bot_beam_y = beam_ob if agent_hp <= 0: cont = False winner = "Bot" if 1 <= agent_action <= 4: agent_beam_fire = True if agent_action == 1: if agent_y > disp_y/2 - arena_y/2: agent_beam_x = agent_x - beam_width/2; agent_beam_y = disp_y/2 - arena_y/2  agent_beam_size_x = beam_width; agent_beam_size_y = agent_y - disp_y/2 + arena_y/2  else: reward += -25 elif agent_action == 2: if agent_x + character_size < disp_x/2 + arena_x/2: agent_beam_x = agent_x + character_size; agent_beam_y = agent_y + character_size/2 - beam_width/2 agent_beam_size_x = disp_x/2 + arena_x/2 - agent_x - character_size; agent_beam_size_y = beam_width  else: reward += -25 elif agent_action == 3: if agent_y + character_size < disp_y/2 + arena_y/2: agent_beam_x = agent_x + character_size/2 - beam_width/2; agent_beam_y = agent_y + character_size agent_beam_size_x = beam_width; agent_beam_size_y = disp_y/2 + arena_y/2 - agent_y - character_size  else: reward += -25 elif agent_action == 4: if agent_x > disp_x/2 - arena_x/2: agent_beam_x = disp_x/2 - arena_x/2; agent_beam_y = agent_y + character_size/2 - beam_width/2 agent_beam_size_x = agent_x - disp_x/2 + arena_x/2; agent_beam_size_y = beam_width else: reward += -25  elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 reward += -5 elif bot_y <= agent_y <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: agent_y = bot_y + character_size reward += -2 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_size >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_size reward += -5 elif bot_x <= agent_x + character_size <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: agent_x = bot_x - character_size reward += -2 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_size >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_size reward += -5 elif bot_y <= agent_y + character_size <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: agent_y = bot_y - character_size reward += -2 elif agent_action == 8: agent_x -= character_move_speed if agent_x <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 reward += -5 elif bot_x <= agent_x <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size:  agent_x = bot_x + character_size    reward += -2  if agent_beam_fire == True: if beam_hit_detector("agent"): #print "Bot Got Hit!" bot_hp -= beam_damage reward += 50 agent_beam_size_x = agent_beam_size_y = 0 agent_beam_x = agent_beam_y = beam_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" return reward, cont, successful, winner def bot_beam_dir_detector(): global bot_current_action if bot_current_action == 1: bot_beam_dir = 2 elif bot_current_action == 2: bot_beam_dir = 4 elif bot_current_action == 3: bot_beam_dir = 3 elif bot_current_action == 4: bot_beam_dir = 1 else: bot_beam_dir = 0 return bot_beam_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 100 with tf.Session() as sess: sess.run(initialize) success = 0 for i in tqdm(range(1, num_episodes)): #print "Episode #", i rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), #mapping(complexity, float(agent_hp) / float(character_init_health)), #mapping(complexity, float(bot_hp) / float(character_init_health)), mapping(complexity, float(agent_x - bot_x) / float(arena_x)), mapping(complexity, float(agent_y - bot_y) / float(arena_y)), bot_beam_dir ]])   b = bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state}) r, c, d, winner = action(a + 1, b) bot_beam_dir = bot_beam_dir_detector() next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), #mapping(complexity, float(agent_hp) / float(character_init_health)), #mapping(complexity, float(bot_hp) / float(character_init_health)), mapping(complexity, float(agent_x - bot_x) / float(arena_x)), mapping(complexity, float(agent_y - bot_y) / float(arena_y)), bot_beam_dir ]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r  screen_blit()  if d == True: e = 1. / ((i / 50) + 10) success += 1 break  #print agent_hp, bot_hp  display.update()  jList.append(j)  rList.append(rAll) print winner 

Update #3 on August 22, 2017:

I've noticed that if the agent hits the bot with a bullet on a turn and the action the bot taken on that turn was not "fire a bullet", then the wrong actions would be given credit. Thus, I've turned the bullets into beams so the bot/agent takes damage on the turn the beam's fired.

added 252 characters in body
Source Link
nedward
  • 414
  • 5
  • 13

Update #1 on August 14th, 2017:

Update #2 on August 18, 2017:

Based on the advice of @NeilSlater, I've implemented experience replay into my model. The algorithm has improved, but I'm going to look for more better improvement options that offer convergence.

Update:

Update #1 on August 14th, 2017:

Update #2 on August 18, 2017:

Based on the advice of @NeilSlater, I've implemented experience replay into my model. The algorithm has improved, but I'm going to look for more better improvement options that offer convergence.

Updated the code
Source Link
nedward
  • 414
  • 5
  • 13
from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200);   red = (255, 0, 0); green = (0, 255, 0);   blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 3050 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10  jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32)  bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius) draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y: bot_action = 1 else: bot_action = 3 else: bot_action = 9 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: if random.randint(0, 100) > 5: if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9 else: if random.randint(0, 100) > 5: x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \ or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \ agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius): return True else: return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \ bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \ bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \ or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius): return True else: return False elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \ bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10))  def clip(value): if value > 250.0: value = 250.0 elif value < -250.0: value = -250.0 return value  #Environmentdef action(Training)agent_action, Parametersbot_action): #agent_bullet_fire = bot_bullet_fire = False #agent_bullet_dir =global [0bot_bullet_x, 0];bot_bullet_y, bot_bullet_diragent_bullet_x, =agent_bullet_y, [0agent_bullet_fire, 0]\ #The environment: def action(agent_x, agent_y, bot_xbot_bullet_fire, bot_yagent_bullet_direction_x, agent_actionagent_bullet_direction_y, bot_actionbot_bullet_direction_x, agent_hp,\  bot_hp, agent_bullet bot_bullet_direction_y, bot_bulletagent_x, agent_bullet_fireagent_y, bot_bullet_firebot_x, agent_bullet_dirbot_y, bot_bullet_dir): agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, #Bulletlast_bot_bullet_x, Managementlast_bot_bullet_y reward = 0  0; cont = True  True; successful = False False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False:  #If bullet's fired by bot:  bot_bullet_fire = True if bot_action == 1: bot_bullet_dirbot_bullet_direction_x = [0,0; bot_bullet_direction_y = -bullet_speed]bullet_speed elif bot_action == 2: bot_bullet_dirbot_bullet_direction_x = [bullet_speed,bullet_speed; 0]bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_dirbot_bullet_direction_x = [0,0; bullet_speed]bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_dirbot_bullet_direction_x = [-bullet_speed, 0] elif bot_bullet_fire == True: bot_bullet[0] = bot_bullet[0] + bot_bullet_dir[0]; bot_bullet[1] = bot_bullet[1] + bot_bullet_dir[1] if bot_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or bot_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or bot_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or bot_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius: bullet_speed; bot_bullet_firebot_bullet_direction_y = False0 bot_bulletbot_bullet_x = [bot_x, bot_y] if agent_x - character_radius - border <= bot_bullet[0] <= agent_x + character_radius + border and agent_y - character_radius - border < bot_bullet[1] < agent_y + character_radiusbot_x + border: agent_hp -= bullet_damage  bot_bullet_direction_x; rewardbot_bullet_y = -50 if agent_hp <= 0:  contbot_y =+ Falsebot_bullet_direction_y   winner = "Bot" ifelif 5 <= bot_action <= 8: bot_bullet = [bot_x, bot_y] if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1    if 1 <= agent_action <= 4 and agent_bullet_firebot_bullet_fire == FalseTrue: agent_bullet_firelast_bot_bullet_x = True  bot_bullet_x; last_bot_bullet_y = ifbot_bullet_y  agent_action == 1:  bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y if   agent_y - character_radius > disp_y/2 - arena_y +if borderbullet_hit_detector("bot"):   agent_bullet_dirprint ="Agent [0,Got -bullet_speed]Hit!" else: agent_hp -= bullet_damage  reward = -25 elif agent_action == 2:50 if agent_x + character_radius < disp_x/2 + arena_xbot_bullet_fire -= border:False   agent_bullet_dirbot_bullet_direction_x = [bullet_speed, 0]  0; bot_bullet_direction_y = else:0   rewardbot_bullet_x = -25  bot_bullet_y = bullet_ob; last_bot_bullet_x elif= agent_actionlast_bot_bullet_y === 3:bullet_ob if agent_y + character_radius < disp_y/2 + arena_yagent_hp -<= border0: agent_bullet_dircont = [0, bullet_speed]  else:False rewardwinner = -25"Bot" elif agent_action == 4:  bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 ifor agent_xbot_bullet_x - character_radiusbullet_radius ><= disp_x/2 - arena_x/2 +or border:\   bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 agent_bullet_diror =bot_bullet_y [-bullet_speed, 0]  bullet_radius <= disp_y/2 - elsearena_y/2:   rewardbot_bullet_fire = -25False   elif 1 <= agent_action <= 4 and agent_bullet_fire == True:  bot_bullet_direction_x = 0; rewardbot_bullet_direction_y = -200   elif agent_bullet_fire == True:  agent_bullet[0]bot_bullet_x = agent_bullet[0] + agent_bullet_dir[0]; agent_bullet[1]bot_bullet_y = agent_bullet[1] + agent_bullet_dir[1]  bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_bullet[0]1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1: if agent_y - character_radius - border > disp_xdisp_y/2 +- arena_xarena_y/2: agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed reward = 10 else: reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 2: if agent_x + bullet_radiuscharacter_radius or+ agent_bullet[0]border < disp_x/2 -+ arena_x/2: agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0 reward = 10 else: reward = -25  bullet_radius or agent_bullet[1] > disp_y/2 + arena_y/2  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 3: if agent_y + bullet_radiuscharacter_radius or+ agent_bullet[1]border < disp_y/2 -+ arena_y/2:  - bullet_radius agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed reward = 10 else: agent_bullet_fire  reward = False-25 agent_bullet agent_bullet_x = [agent_x,agent_bullet_y agent_y]= bullet_ob agent_bullet_dir  agent_bullet_fire = [0,False  0] elif agent_action == 4: if bot_x - character_radius <= agent_bullet[0] <= bot_x + character_radius and bot_y if agent_x - character_radius <- agent_bullet[1]border <> bot_ydisp_x/2 +- character_radiusarena_x/2: bot_hp  agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = bullet_damage0   reward = 10010 agent_bullet_fire else:  reward = False-25 agent_bullet_dir  agent_bullet_x = [0,agent_bullet_y 0]= bullet_ob if bot_hp <= 0: agent_bullet_fire = False successfulif =agent_bullet_fire == True: cont  agent_bullet_x = Falseagent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y winner  last_agent_bullet_x = "Agent" agent_bullet_x; last_agent_bullet_y = agent_bullet_y  if 5 <= agent_action <= 8 else: agent_bullet reward = [agent_x,-20  agent_y] elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2 + character_radius + 1: agent_y = disp_y/2 - arena_y/2 + character_radius + 1border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2 - character_radius - 1: agent_x = disp_x/2 + arena_x/2 - character_radius - 1border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2 - character_radius - 1: agent_y = disp_y/2 + arena_y/2 - character_radius - 1border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2 + character_radius + 1: agent_x = disp_x/2 - arena_x/2 + character_radius + 1border reward = -5 else: reward = 5  if agent_bullet_fire == True: last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob return reward, cont, successful, winner   return reward, cont, successful, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_bullet, bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0  s = 0  0; d = False  agent_x = int(list(init_character_a_state)[0]); agent_y = int(list(init_character_a_state)[1]) bot_x = int(list(init_character_b_state)[0]); bot_y = int(list(init_character_b_state)[1])  agent_hp = bot_hp = int(character_init_health) bot_bullet = list(init_character_b_state); agent_bullet = list(init_character_a_state) agent_bullet_fire = bot_bullet_fire = False agent_bullet_dir = [0, 0];False; bot_bullet_dirc = [0, 0]  True; j = 0   c = Trueparam_init() samples = [] while c == True:  disp.fill(aqua) draw.rect(disp, black, (disp_x/2 - arena_x/2 - border, disp_y/2 - arena_y/2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x/2 - arena_x/2, disp_y/2 - arena_y/2, arena_x, arena_y)) draw.circle(disp, black, agent_bullet, bullet_radius) draw.circle(disp, black, bot_bullet, bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp)/float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp)/float(character_init_health) * 100, 14))  j += 1 """ ---CURRENT STATE--- Evenything will be on a scale of 0 to "complexity". screen_blit(0 = 0, "complexity" = max) """ complexity = 100 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float( agent_y) / float(arena_y)),   mapping(complexity, float( bot_x) / float(arena_x)),   mapping(complexity, float( bot_y) / float(arena_y)),   mapping(complexity, float( bot_bullet[0]bot_bullet_x / float(arena_x))),   mapping(complexity, float( bot_bullet[1]bot_bullet_y / float(arena_y))),   mapping(complexity, abs( float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]])   #current_state is the array of parameters for feeding the neural network  #print current_state a,allQ = sess.runbot_take_action([predict, Q],feed_dict={input_layer : current_state}) #bot move #a[0] = moves[0]   #moves = moves[1:] #1~4 are shooting a bullet. 5~8 are movement. 9 is doing nothing. if agent_x - character_radius <= bot_x <= agent_x + character_radius:  if agent_y <= bot_y: if randomnp.randint(0, 100) > 20: bot_action = 1 else: if random.randintrand(0, 1001) > 20:  bot_action = 3 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: < e ifor agent_xi <= bot_x: if random.randint(0, 100) > 205:   bot_actiona = 4 else:  if random.randint(0, 1008) > 20: bot_action = 2 else: if random.randint(0, 100) > 20: #Find opponenta, caluculate x and y distance and go the shortest way  x_dist = abs(bot_x - agent_x); y_dist_ = abssess.run(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 [predict, Q],feed_dict={input_layer else:  bot_action = random.randint(1, 9current_state}) if np.random.rand(1) < e: a[0] = random.randint(0,8) #Action: Takes positions and actions. r, c, d, new_agent_x, new_agent_y, new_bot_x, new_bot_y, new_agent_hp, new_bot_hp, new_agent_bullet, new_bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir = action(agent_x, agent_y, bot_x,a bot_y,+ int(a[0]+1)1, bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir) next_state = np.array([[mapping(complexity, float(new_agent_xagent_x) / float(arena_x)),   mapping(complexity, float( new_agent_yagent_y) / float(arena_y)),   mapping(complexity, float( new_bot_xbot_x) / float(arena_x)),   mapping(complexity, float( new_bot_ybot_y) / float(arena_y)),   mapping(complexity, float( new_bot_bullet[0]bot_bullet_x / float(arena_x))),   mapping(complexity, float( new_bot_bullet[1]bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float( new_agent_xagent_x - new_bot_xbot_x)) / float(disp_xarena_x)),   mapping(complexity, abs(float(new_agent_yagent_y - new_bot_ybot_y)) / float(disp_yarena_y))]]) #Q1 = sess.run(Q, feed_dict = {input_layer : next_state}) #maxQ1 = np.max(Q1) #targetQ = allQ #targetQ[0,a[0]] = r + y * maxQ1 #for index, buf in enumerate(targetQ[0]): # targetQ[0][index] = clip(targetQ[0][index]) samples.append([current_state, a, r, next_state])  #print samples #print len(samples) allQ = sess.run(Q, feed_dict={input_layer : current_state}) print allQ  if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ #print batch_targetQ batch_targetQ[0,a[0]]batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ})  #print a[0] + 1, r, targetQ[0]  rAll += r  bot_x = new_bot_x; bot_y = new_bot_y; agent_x = new_agent_x; agent_y = new_agent_y; agent_hp = new_agent_hp; bot_hp = new_bot_hp; agent_bullet = new_agent_bullet; bot_bullet = new_bot_bullet  if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update()  #sleep(1) jList.append(j)  rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList)  plt.show() 
from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200); red = (255, 0, 0); green = (0, 255, 0); blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 30 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10  jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10))  def clip(value): if value > 250.0: value = 250.0 elif value < -250.0: value = -250.0 return value  #Environment (Training) Parameters: #agent_bullet_fire = bot_bullet_fire = False #agent_bullet_dir = [0, 0]; bot_bullet_dir = [0, 0] #The environment: def action(agent_x, agent_y, bot_x, bot_y, agent_action, bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir):  #Bullet Management reward = 0  cont = True  successful = False  winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False:  #If bullet's fired by bot:  bot_bullet_fire = True if bot_action == 1: bot_bullet_dir = [0, -bullet_speed] elif bot_action == 2: bot_bullet_dir = [bullet_speed, 0] elif bot_action == 3: bot_bullet_dir = [0, bullet_speed] elif bot_action == 4: bot_bullet_dir = [-bullet_speed, 0] elif bot_bullet_fire == True: bot_bullet[0] = bot_bullet[0] + bot_bullet_dir[0]; bot_bullet[1] = bot_bullet[1] + bot_bullet_dir[1] if bot_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or bot_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or bot_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or bot_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius:  bot_bullet_fire = False bot_bullet = [bot_x, bot_y] if agent_x - character_radius - border <= bot_bullet[0] <= agent_x + character_radius + border and agent_y - character_radius - border < bot_bullet[1] < agent_y + character_radius + border: agent_hp -= bullet_damage  reward = -50 if agent_hp <= 0:  cont = False   winner = "Bot" if 5 <= bot_action <= 8: bot_bullet = [bot_x, bot_y] if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1    if 1 <= agent_action <= 4 and agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1:  if agent_y - character_radius > disp_y/2 - arena_y + border:   agent_bullet_dir = [0, -bullet_speed] else:  reward = -25 elif agent_action == 2: if agent_x + character_radius < disp_x/2 + arena_x - border:   agent_bullet_dir = [bullet_speed, 0]  else:   reward = -25  elif agent_action == 3: if agent_y + character_radius < disp_y/2 + arena_y - border: agent_bullet_dir = [0, bullet_speed]  else: reward = -25 elif agent_action == 4:  if agent_x - character_radius > disp_x/2 - arena_x + border:   agent_bullet_dir = [-bullet_speed, 0]  else:   reward = -25   elif 1 <= agent_action <= 4 and agent_bullet_fire == True:  reward = -20   elif agent_bullet_fire == True:  agent_bullet[0] = agent_bullet[0] + agent_bullet_dir[0]; agent_bullet[1] = agent_bullet[1] + agent_bullet_dir[1]   if agent_bullet[0] > disp_x/2 + arena_x/2 + bullet_radius or agent_bullet[0] < disp_x/2 - arena_x/2 - bullet_radius or agent_bullet[1] > disp_y/2 + arena_y/2 + bullet_radius or agent_bullet[1] < disp_y/2 - arena_y/2 - bullet_radius: agent_bullet_fire = False agent_bullet = [agent_x, agent_y] agent_bullet_dir = [0, 0] if bot_x - character_radius <= agent_bullet[0] <= bot_x + character_radius and bot_y - character_radius < agent_bullet[1] < bot_y + character_radius: bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_dir = [0, 0] if bot_hp <= 0: successful = True cont = False winner = "Agent"  if 5 <= agent_action <= 8: agent_bullet = [agent_x, agent_y] if agent_action == 5: agent_y -= character_move_speed if agent_y <= disp_y/2 - arena_y/2 + character_radius + 1: agent_y = disp_y/2 - arena_y/2 + character_radius + 1 reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x >= disp_x/2 + arena_x/2 - character_radius - 1: agent_x = disp_x/2 + arena_x/2 - character_radius - 1 reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y >= disp_y/2 + arena_y/2 - character_radius - 1: agent_y = disp_y/2 + arena_y/2 - character_radius - 1 reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x <= disp_x/2 - arena_x/2 + character_radius + 1: agent_x = disp_x/2 - arena_x/2 + character_radius + 1 reward = -5 else: reward = 5  return reward, cont, successful, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, agent_bullet, bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir  with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(num_episodes): rAll = 0  s = 0  d = False  agent_x = int(list(init_character_a_state)[0]); agent_y = int(list(init_character_a_state)[1]) bot_x = int(list(init_character_b_state)[0]); bot_y = int(list(init_character_b_state)[1])  agent_hp = bot_hp = int(character_init_health) bot_bullet = list(init_character_b_state); agent_bullet = list(init_character_a_state) agent_bullet_fire = bot_bullet_fire = False agent_bullet_dir = [0, 0]; bot_bullet_dir = [0, 0]  j = 0   c = True samples = [] while c == True:  disp.fill(aqua) draw.rect(disp, black, (disp_x/2 - arena_x/2 - border, disp_y/2 - arena_y/2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x/2 - arena_x/2, disp_y/2 - arena_y/2, arena_x, arena_y)) draw.circle(disp, black, agent_bullet, bullet_radius) draw.circle(disp, black, bot_bullet, bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp)/float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp)/float(character_init_health) * 100, 14))  j += 1 """ ---CURRENT STATE--- Evenything will be on a scale of 0 to "complexity". (0 = 0, "complexity" = max) """ complexity = 100 current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float( agent_y) / float(arena_y)), mapping(complexity, float( bot_x) / float(arena_x)), mapping(complexity, float( bot_y) / float(arena_y)), mapping(complexity, float( bot_bullet[0] / float(arena_x))), mapping(complexity, float( bot_bullet[1] / float(arena_y))), mapping(complexity, abs( float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]])   #current_state is the array of parameters for feeding the neural network  #print current_state a,allQ = sess.run([predict, Q],feed_dict={input_layer : current_state}) #bot move #a[0] = moves[0]   #moves = moves[1:] #1~4 are shooting a bullet. 5~8 are movement. 9 is doing nothing. if agent_x - character_radius <= bot_x <= agent_x + character_radius:  if agent_y <= bot_y: if random.randint(0, 100) > 20: bot_action = 1 else: if random.randint(0, 100) > 20:  bot_action = 3 elif agent_y - character_radius <= bot_y <= agent_y + character_radius:  if agent_x <= bot_x: if random.randint(0, 100) > 20:   bot_action = 4 else:  if random.randint(0, 100) > 20: bot_action = 2 else: if random.randint(0, 100) > 20: #Find opponent, caluculate x and y distance and go the shortest way  x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5  else:  bot_action = random.randint(1, 9) if np.random.rand(1) < e: a[0] = random.randint(0,8) #Action: Takes positions and actions. r, c, d, new_agent_x, new_agent_y, new_bot_x, new_bot_y, new_agent_hp, new_bot_hp, new_agent_bullet, new_bot_bullet, winner, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir = action(agent_x, agent_y, bot_x, bot_y, int(a[0]+1), bot_action, agent_hp, bot_hp, agent_bullet, bot_bullet, agent_bullet_fire, bot_bullet_fire, agent_bullet_dir, bot_bullet_dir) next_state = np.array([[mapping(complexity, float(new_agent_x) / float(arena_x)), mapping(complexity, float( new_agent_y) / float(arena_y)), mapping(complexity, float( new_bot_x) / float(arena_x)), mapping(complexity, float( new_bot_y) / float(arena_y)), mapping(complexity, float( new_bot_bullet[0] / float(arena_x))), mapping(complexity, float( new_bot_bullet[1] / float(arena_y))), mapping(complexity, float( new_agent_x - new_bot_x) / float(disp_x)), mapping(complexity, float(new_agent_y - new_bot_y) / float(disp_y))]]) #Q1 = sess.run(Q, feed_dict = {input_layer : next_state}) #maxQ1 = np.max(Q1) #targetQ = allQ #targetQ[0,a[0]] = r + y * maxQ1 #for index, buf in enumerate(targetQ[0]): # targetQ[0][index] = clip(targetQ[0][index]) samples.append([current_state, a, r, next_state])  #print samples #print len(samples) allQ = sess.run(Q, feed_dict={input_layer : current_state}) print allQ  if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ #print batch_targetQ batch_targetQ[0,a[0]] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ})  #print a[0] + 1, r, targetQ[0]  rAll += r  bot_x = new_bot_x; bot_y = new_bot_y; agent_x = new_agent_x; agent_y = new_agent_y; agent_hp = new_agent_hp; bot_hp = new_bot_hp; agent_bullet = new_agent_bullet; bot_bullet = new_bot_bullet  if d == True: e = 1./((i/50) + 10) success += 1 break display.update()  #sleep(1) jList.append(j)  rList.append(rAll) print winner plt.plot(rList) plt.show() 
from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf #Screen Setup disp_x, disp_y = 1500, 1000 arena_x, arena_y = 800, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200)  red = (255, 0, 0); green = (0, 255, 0)  blue = (0, 0, 255); black = (0, 0, 0) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radius = 30 character_move_speed = 20 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_pos = list(init_character_b_state) bullet_a_fire = False; bullet_b_fire = False #The Neural Network input_layer = tf.placeholder(shape=[1,8],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32)  bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \ bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_x, \ agent_bullet_y, bullet_radius, bot_bullet_x, bot_bullet_y, character_radius, agent_x, \ agent_y, bot_x, bot_y, character_init_health, agent_hp, bot_hp, red, blue, aqua, green, black disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black, [agent_bullet_x, agent_bullet_y], bullet_radius) draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius) draw.circle(disp, black, (agent_x, agent_y), character_radius + border_2) draw.circle(disp, red, (agent_x, agent_y), character_radius) draw.circle(disp, black, (bot_x, bot_y), character_radius + border_2) draw.circle(disp, blue, (bot_x, bot_y), character_radius) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): global agent_x, agent_y, bot_x, bot_y, character_radius, bot_action, border if agent_x - character_radius - border <= bot_x <= agent_x + character_radius + border: if random.randint(0, 100) > 5: if agent_y <= bot_y: bot_action = 1 else: bot_action = 3 else: bot_action = 9 elif agent_y - character_radius <= bot_y <= agent_y + character_radius: if random.randint(0, 100) > 5: if agent_x <= bot_x: bot_action = 4 else: bot_action = 2 else: bot_action = 9 else: if random.randint(0, 100) > 5: x_dist = abs(bot_x - agent_x); y_dist = abs(bot_y - agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: bot_action = 6 else: bot_action = 8 else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player == "bot": if bot_bullet_x == last_bot_bullet_x: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \ or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_y - bullet_radius < agent_y + character_radius + border or \ agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radius): return True else: return False else: if agent_bullet_x == last_agent_bullet_x: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius + border or \ bot_x - character_radius - border < agent_bullet_x - bullet_radius < bot_x + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius or \ bot_y - character_radius < agent_bullet_y - bullet_radius < bot_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > agent_bullet_y + bullet_radius) \ or (agent_bullet_y - bullet_radius > bot_y + character_radius + border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius): return True else: return False elif agent_bullet_y == last_agent_bullet_y: if bot_y - character_radius - border < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \ bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False  def mapping(maximum, number): return int(abs(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fire, \ bot_bullet_fire, agent_bullet_direction_x, agent_bullet_direction_y, bot_bullet_direction_x, \   bot_bullet_direction_y, agent_x, agent_y, bot_x, bot_y, agent_hp, bot_hp, last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_y reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_fire = True if bot_action == 1: bot_bullet_direction_x = 0; bot_bullet_direction_y = -bullet_speed elif bot_action == 2: bot_bullet_direction_x = bullet_speed; bot_bullet_direction_y = 0 elif bot_action == 3: bot_bullet_direction_x = 0; bot_bullet_direction_y = bullet_speed elif bot_action == 4: bot_bullet_direction_x = -bullet_speed; bot_bullet_direction_y = 0 bot_bullet_x = bot_x + bot_bullet_direction_x; bot_bullet_y = bot_y + bot_bullet_direction_y elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 + character_radius + 1 elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1: bot_x = disp_x/2 + arena_x/2 - character_radius - 1 elif bot_action == 7: bot_y += character_move_speed if bot_y >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radius - 1 elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 + character_radius + 1 if bot_bullet_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y  bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y    if bullet_hit_detector("bot"): print "Agent Got Hit!" agent_hp -= bullet_damage  reward = -50 bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if agent_hp <= 0: cont = False winner = "Bot" elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fire = True  if agent_action == 1: if agent_y - character_radius - border > disp_y/2 - arena_y/2: agent_bullet_direction_x = 0; agent_bullet_direction_y = -bullet_speed reward = 10 else: reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 2: if agent_x + character_radius + border < disp_x/2 + arena_x/2: agent_bullet_direction_x = bullet_speed; agent_bullet_direction_y = 0 reward = 10 else: reward = -25    agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 3: if agent_y + character_radius + border < disp_y/2 + arena_y/2:   agent_bullet_direction_x = 0; agent_bullet_direction_y = bullet_speed reward = 10 else:   reward = -25  agent_bullet_x = agent_bullet_y = bullet_ob   agent_bullet_fire = False   elif agent_action == 4:  if agent_x - character_radius - border > disp_x/2 - arena_x/2:   agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_y = 0   reward = 10  else:  reward = -25   agent_bullet_x = agent_bullet_y = bullet_ob  agent_bullet_fire = False if agent_bullet_fire == True:   agent_bullet_x = agent_x + agent_bullet_direction_x; agent_bullet_y = agent_y + agent_bullet_direction_y   last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y   else:  reward = -20   elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward = -5 else: reward = 5 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + border >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + border >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - border reward = -5 else: reward = 5 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward = -5 else: reward = 5  if agent_bullet_fire == True: last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_y agent_bullet_x += agent_bullet_direction_x; agent_bullet_y += agent_bullet_direction_y if bullet_hit_detector("agent"): print "Bot Got Hit!" bot_hp -= bullet_damage reward = 100 agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob return reward, cont, successful, winner  #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10 with tf.Session() as sess: sess.run(initialize) success = 0 for i in range(1, num_episodes): rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1 screen_blit() current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float(agent_y) / float(arena_y)),   mapping(complexity, float(bot_x) / float(arena_x)),   mapping(complexity, float(bot_y) / float(arena_y)),   mapping(complexity, float(bot_bullet_x / float(arena_x))),   mapping(complexity, float(bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state}) r, c, d, winner = action(a + 1, bot_action) next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)),   mapping(complexity, float(agent_y) / float(arena_y)),   mapping(complexity, float(bot_x) / float(arena_x)),   mapping(complexity, float(bot_y) / float(arena_y)),   mapping(complexity, float(bot_bullet_x / float(arena_x))),   mapping(complexity, float(bot_bullet_y / float(arena_y))),   mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)),   mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y))]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r if d == True: e = 1. / ((i / 50) + 10) success += 1 break display.update() rList.append(rAll) print winner  print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList)  plt.show() 
Updated the code and some explanations
Source Link
nedward
  • 414
  • 5
  • 13
Loading
deleted 4 characters in body
Source Link
nedward
  • 414
  • 5
  • 13
Loading
I added my specific situation.
Source Link
nedward
  • 414
  • 5
  • 13
Loading
Source Link
nedward
  • 414
  • 5
  • 13
Loading