from pygame import * from pygame.locals import * import sys from time import sleep import numpy as np import random from time import sleep import tensorflow as tf from pylab import savefig from tqdm import tqdm #Screen Setup disp_x, disp_y = 15001000, 1000800 arena_x, arena_y = 8001000, 800 border = 4; border_2 = 1 #Color Setup white = (255, 255, 255); aqua= (0, 200, 200) red = (255, 0, 0); green = (0, 255, 0) blue = (0, 0, 255); black = (0, 0, 0) green_yellow = (173, 255, 47); energy_blue = (125, 249, 255) #Initialize character positions init_character_a_state = [disp_x/2 - arena_x/2 + 50, disp_y/2 - arena_y/2 + 50] init_character_b_state = [disp_x/2 + arena_x/2 - 50, disp_y/2 + arena_y/2 - 50] #Setup character dimentions character_radiuscharacter_size = 3050 character_move_speed = 2025 #Initialize character stats character_init_health = 100 #initialize bullet stats bullet_speed = 50 bullet_damagebeam_damage = 10 bullet_radius = 7 bullet_a_pos = list(init_character_a_state); bullet_b_posbeam_width = list(init_character_b_state)10 bullet_a_fire = False; bullet_b_firebeam_ob = False-100 #The Neural Network input_layer = tf.placeholder(shape=[1,8]7],dtype=tf.float32) weight_1 = tf.Variable(tf.random_uniform([8[7,9],0,0.1)) #weight_2 = tf.Variable(tf.random_uniform([6,9],0,0.1)) #The calculations, loss function and the update model Q = tf.matmul(input_layer, weight_1) predict = tf.argmax(Q, 1) next_Q = tf.placeholder(shape=[1,9],dtype=tf.float32) loss = tf.reduce_sum(tf.square(next_Q - Q)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.0001001) updateModel = trainer.minimize(loss) initialize = tf.global_variables_initializer() jList = [] rList = [] init() font.init() myfont = font.SysFont('Comic Sans MS', 15) myfont2 = font.SysFont('Comic Sans MS', 150) myfont3 = font.SysFont('Gothic', 30) disp = display.set_mode((disp_x, disp_y), 0, 32) bullet_ob = -100 #CHARACTER/BULLET PARAMETERS bot_bullet_x = bot_bullet_y = bullet_ob agent_bullet_x = agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = bool() agent_bullet_direction_x = agent_bullet_direction_y = int() bot_bullet_direction_x = bot_bullet_direction_y = int() agent_x = agent_y = int() bot_x = bot_y = int() agent_hp = bot_hp = int() bot_beam_dir = int() agent_beam_fire = bot_beam_fire = bool() agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = int() agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = int() bot_current_action = agent_current_action = int() def param_init(): """Initializes parameters""" global bot_bullet_x, bot_bullet_y, agent_bullet_x, agent_bullet_y, agent_bullet_fireagent_x, \ bot_bullet_fireagent_y, agent_bullet_direction_xbot_x, agent_bullet_direction_ybot_y, bot_bullet_direction_xagent_hp, \ bot_bullet_direction_ybot_hp, agent_xagent_beam_fire, agent_ybot_beam_fire, bot_xagent_beam_x, bot_ybot_beam_x, agent_hpagent_beam_y, bot_hpbot_beam_y agent_bullet_x = agent_bullet_y = bullet_ob bot_bullet_x = bot_bullet_y = bullet_ob last_agent_bullet_x = last_agent_bullet_y = bullet_ob last_bot_bullet_x = last_bot_bullet_y = bullet_ob agent_bullet_fire = bot_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 agent_x = list(init_character_a_state)[0]; agent_y = list(init_character_a_state)[1] bot_x = list(init_character_b_state)[0]; bot_y = list(init_character_b_state)[1] agent_hp = bot_hp = character_init_health agent_beam_fire = bot_beam_fire = False agent_beam_x = bot_beam_x = agent_beam_y = bot_beam_y = beam_ob agent_beam_size_x = agent_beam_size_y = bot_beam_size_x = bot_beam_size_y = 0 def screen_blit(): global disp, disp_x, disp_y, arena_x, arena_y, border, border_2, agent_bullet_xcharacter_size, agent_x, \ agent_bullet_yagent_y, bullet_radiusbot_x, bot_bullet_xbot_y, bot_bullet_ycharacter_init_health, character_radiusagent_hp, agent_xbot_hp, red, blue, aqua, green, black, green_yellow, energy_blue, \ agent_yagent_beam_fire, bot_xbot_beam_fire, bot_yagent_beam_x, character_init_healthagent_beam_y, agent_hpbot_beam_x, bot_hpbot_beam_y, redagent_beam_size_x, blueagent_beam_size_y, aquabot_beam_size_x, greenbot_beam_size_y, blackbeam_width disp.fill(aqua) draw.rect(disp, black, (disp_x / 2 - arena_x / 2 - border, disp_y / 2 - arena_y / 2 - border, arena_x + border * 2, arena_y + border * 2)) draw.rect(disp, green, (disp_x / 2 - arena_x / 2, disp_y / 2 - arena_y / 2, arena_x, arena_y)) draw.circle(disp, black,if [agent_bullet_x,bot_beam_fire agent_bullet_y],== bullet_radius)True: draw.circle(disp, black, [bot_bullet_x, bot_bullet_y], bullet_radius) draw.circlerect(disp, blackgreen_yellow, (agent_xagent_beam_x, agent_y)agent_beam_y, agent_beam_size_x, agent_beam_size_y)) bot_beam_fire = False character_radius + border_2) if agent_beam_fire == draw.circle(disp,True: red, (agent_x, agent_y), character_radius) draw.circlerect(disp, blackenergy_blue, (bot_xbot_beam_x, bot_y)bot_beam_y, bot_beam_size_x, bot_beam_size_y)) agent_beam_fire = False draw.rect(disp, red, character_radius(agent_x, +agent_y, border_2character_size, character_size)) draw.circlerect(disp, blue, (bot_x, bot_y), character_radiuscharacter_size, character_size)) draw.rect(disp, red, (disp_x / 2 - 200, disp_y / 2 + arena_y / 2 + border + 1, float(agent_hp) / float(character_init_health) * 100, 14)) draw.rect(disp, blue, (disp_x / 2 + 200, disp_y / 2 + arena_y / 2 + border + 1, float(bot_hp) / float(character_init_health) * 100, 14)) def bot_take_action(): return random.randint(1, 9) def beam_hit_detector(player): global agent_x, agent_y, bot_x, bot_y, character_radiusagent_beam_fire, bot_actionbot_beam_fire, borderagent_beam_x, \ ifbot_beam_x, agent_xagent_beam_y, -bot_beam_y, character_radiusagent_beam_size_x, -agent_beam_size_y, border\ <= bot_x <= agent_xbot_beam_size_x, +bot_beam_size_y, character_radiusbot_current_action, +agent_current_action, borderbeam_width, character_size if player == "bot": if random.randint(0, 100)bot_current_action >== 51: if disp_y/2 - arena_y/2 <= agent_y <= bot_y and (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + character_size): bot_action =return 1True else: bot_action =return 3False else: elif bot_current_action == 2: bot_action = 9 elif agent_y -if character_radiusbot_x <= bot_yagent_x <= agent_ydisp_x/2 + character_radius: arena_x/2 ifand random.randint(0, 100) >agent_y 5: < bot_beam_y + beam_width < agent_y + character_size or agent_y < bot_beam_y if< agent_xagent_y <=+ bot_xcharacter_size): bot_action =return 4True else: bot_action = 2 else: bot_action = 9 return else:False if random.randint(0,elif 100)bot_current_action >== 53: x_dist = abs(bot_x - agent_x); y_dist =if abs(bot_y -<= agent_y) if x_dist >= y_dist: if bot_x - agent_x <= 0: disp_y/2 + bot_actionarena_y/2 =and 6 (agent_x < bot_beam_x + beam_width < agent_x + character_size or agent_x < bot_beam_x < agent_x + elsecharacter_size): bot_action =return 8True else: if bot_y - agent_y <= 0: bot_action = 7 else: bot_action = 5 else: bot_action = random.randint(1, 9) def bullet_hit_detector(player): global bot_bullet_x, bot_bullet_y, last_bot_bullet_x, last_bot_bullet_y, agent_x, agent_y, last_agent_bullet_x, last_agent_bullet_y, character_radius, border, bullet_radius if player ==return "bot":False ifelif bot_bullet_xbot_current_action == last_bot_bullet_x4: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius + border or \ agent_x - character_radius - border < bot_bullet_x - bullet_radius < agent_x + character_radius + border: #If the current state of the bullet is touchingdisp_x/inside the agent: if agent_y - character_radius - border < bot_bullet_y + bullet_radius < agent_y + character_radius or \ agent_y - character_radius < bot_bullet_y - bullet_radius < agent_y + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_y2 - bullet_radius > agent_yarena_x/2 +<= character_radiusagent_x +<= borderbot_x and agent_y - character_radius - border > bot_bullet_y + bullet_radius) \ or (bot_bullet_y - bullet_radius > agent_y + character_radius + border and agent_y - character_radius - border > last_bot_bullet_y + bullet_radius): return True else: return False elif bot_bullet_y == last_bot_bullet_y: if agent_y - character_radius - border < bot_bullet_ybot_beam_y -+ bullet_radiusbeam_width < agent_y + character_radius + bordercharacter_size or \ agent_y - character_radius - border < bot_bullet_y + bullet_radiusbot_beam_y < agent_y + character_radius + border: #If the current state of the bullet is touching/inside the agent: if agent_x - character_radius - border < bot_bullet_x + bullet_radius < agent_x + character_radius or \ agent_x - character_radius < bot_bullet_x - bullet_radius < agent_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > bot_bullet_x + bullet_radius) \ or (bot_bullet_x - bullet_radius > agent_x + character_radius + border and agent_x - character_radius - border > last_bot_bullet_x + bullet_radiuscharacter_size): return True else: return False else: if agent_bullet_xagent_current_action == last_agent_bullet_x1: if bot_x - character_radiusdisp_y/2 - borderarena_y/2 <<= agent_bullet_xbot_y +<= bullet_radiusagent_y <and (bot_x +< character_radiusagent_beam_x + border or \ beam_width < bot_x -+ character_radiuscharacter_size -or borderbot_x < agent_bullet_x - bullet_radiusagent_beam_x < bot_x + character_radius + bordercharacter_size): #Ifreturn theTrue current state of the bullet is touching/inside the agent else: ifreturn bot_yFalse - character_radius - border < agent_bullet_y + bullet_radiuselif <agent_current_action bot_y== +2: character_radius or \ if agent_x <= bot_x <= disp_x/2 + bot_yarena_x/2 -and character_radius(bot_y < agent_bullet_yagent_beam_y -+ bullet_radiusbeam_width < bot_y + character_radiuscharacter_size +or border: bot_y < agent_beam_y < bot_y + character_size): return True else: #If the bullet "passed through" the character from the last turn:return False elif agent_current_action == 3: elif (last_agent_bullet_y - bullet_radius > if agent_y <= bot_y +<= character_radiusdisp_y/2 + borderarena_y/2 and bot_y(bot_x -< character_radiusagent_beam_x -+ borderbeam_width >< agent_bullet_ybot_x + bullet_radius)character_size \ or bot_x < agent_beam_x < bot_x + character_size): or (agent_bullet_y - bullet_radius > bot_y + character_radiusreturn +True border and bot_y - character_radius - border > last_agent_bullet_y + bullet_radius) else: return TrueFalse elif bot_current_action == 4: else: if disp_x/2 - arena_x/2 <= bot_x <= agent_x and (bot_y < agent_beam_y + returnbeam_width False < bot_y + character_size or bot_y < agent_beam_y elif< agent_bullet_ybot_y ==+ last_agent_bullet_ycharacter_size): if bot_y - character_radius -return borderTrue < agent_bullet_y - bullet_radius < bot_y + character_radius + border or \else: bot_y - character_radius - border < agent_bullet_y + bullet_radius < bot_y + character_radius +return border:False #If the current state of the bullet is touching/inside the agent: if bot_x - character_radius - border < agent_bullet_x + bullet_radius < bot_x + character_radius or \ bot_x - character_radius < agent_bullet_x - bullet_radius < bot_x + character_radius + border: return True #If the bullet "passed through" the character from the last turn: elif (last_agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > agent_bullet_x + bullet_radius) \ or (agent_bullet_x - bullet_radius > bot_x + character_radius + border and bot_x - character_radius - border > last_agent_bullet_x + bullet_radius): return True else: return False def mapping(maximum, number): return int(absnumber#int(number * maximum) / (maximum/10)) def action(agent_action, bot_action): global bot_bullet_xagent_x, bot_bullet_yagent_y, agent_bullet_xbot_x, agent_bullet_ybot_y, agent_bullet_fireagent_hp, bot_hp, agent_beam_fire, \ bot_bullet_firebot_beam_fire, agent_bullet_direction_xagent_beam_x, agent_bullet_direction_ybot_beam_x, bot_bullet_direction_xagent_beam_y, bot_beam_y, agent_beam_size_x, \ bot_bullet_direction_yagent_beam_size_y, agent_xbot_beam_size_x, agent_ybot_beam_size_y, bot_xbeam_width, bot_yagent_current_action, agent_hpbot_current_action, bot_hp,character_size last_agent_bullet_x, last_agent_bullet_y, last_bot_bullet_x, last_bot_bullet_yagent_current_action = agent_action; bot_current_action = bot_action reward = 0; cont = True; successful = False; winner = "" if 1 <= bot_action <= 4 and bot_bullet_fire == False: bot_bullet_firebot_beam_fire = True if bot_action == 1: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = disp_y/2 -bullet_speed arena_y/2 bot_beam_size_x = beam_width; bot_beam_size_y = bot_y - disp_y/2 + arena_y/2 elif bot_action == 2: bot_bullet_direction_xbot_beam_x = bullet_speed;bot_x bot_bullet_direction_y+ character_size; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_beam_size_x = disp_x/2 + arena_x/2 - bot_x - character_size; bot_beam_size_y = beam_width elif bot_action == 3: bot_bullet_direction_xbot_beam_x = 0;bot_x bot_bullet_direction_y+ character_size/2 - beam_width/2; bot_beam_y = bullet_speedbot_y + character_size bot_beam_size_x = beam_width; bot_beam_size_y = disp_y/2 + arena_y/2 - bot_y - character_size elif bot_action == 4: bot_bullet_direction_xbot_beam_x = disp_x/2 -bullet_speed; bot_bullet_direction_yarena_x/2; bot_beam_y = 0bot_y + character_size/2 - beam_width/2 bot_bullet_x bot_beam_size_x = bot_x +- bot_bullet_direction_x;disp_x/2 bot_bullet_y+ =arena_x/2; bot_ybot_beam_size_y += bot_bullet_direction_ybeam_width elif 5 <= bot_action <= 8: if bot_action == 5: bot_y -= character_move_speed if bot_y <= disp_y/2 - arena_y/2 + character_radius + 1: bot_y = disp_y/2 - arena_y/2 elif agent_y <= bot_y <= agent_y + character_radiuscharacter_size: bot_y = agent_y + 1character_size elif bot_action == 6: bot_x += character_move_speed if bot_x >= disp_x/2 + arena_x/2 - character_radius - 1character_size: bot_x = disp_x/2 + arena_x/2 - character_radiuscharacter_size elif agent_x <= bot_x + character_size <= agent_x + character_size: bot_x = agent_x - 1character_size elif bot_action == 7: bot_y += character_move_speed if bot_y + character_size >= disp_y/2 + arena_y/2 - character_radius - 1: bot_y = disp_y/2 + arena_y/2 - character_radiuscharacter_size elif agent_y <= bot_y + character_size <= agent_y + character_size: bot_y = agent_y - 1character_size elif bot_action == 8: bot_x -= character_move_speed if bot_x <= disp_x/2 - arena_x/2 + character_radius + 1: bot_x = disp_x/2 - arena_x/2 elif agent_x <= bot_x <= agent_x + character_radiuscharacter_size: bot_x = agent_x + 1character_size if bot_bullet_firebot_beam_fire == True: last_bot_bullet_x = bot_bullet_x; last_bot_bullet_y = bot_bullet_y bot_bullet_x += bot_bullet_direction_x; bot_bullet_y += bot_bullet_direction_y if bullet_hit_detectorbeam_hit_detector("bot"): print#print "Agent Got Hit!" agent_hp -= bullet_damagebeam_damage reward =+= -50 bot_bullet_fire = False bot_bullet_direction_xbot_beam_size_x = 0; bot_bullet_direction_ybot_beam_size_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_xbot_beam_x = last_bot_bullet_ybot_beam_y = bullet_obbeam_ob if agent_hp <= 0: cont = False winner = "Bot" elif bot_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or bot_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ bot_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or bot_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: bot_bullet_fire = False bot_bullet_direction_x = 0; bot_bullet_direction_y = 0 bot_bullet_x = bot_bullet_y = bullet_ob; last_bot_bullet_x = last_bot_bullet_y = bullet_ob if 1 <= agent_action <= 4: if agent_bullet_fire == False: agent_bullet_fireagent_beam_fire = True if agent_action == 1: if agent_y - character_radius - border > disp_y/2 - arena_y/2: agent_beam_x = agent_bullet_direction_xagent_x =- 0;beam_width/2; agent_bullet_direction_yagent_beam_y = disp_y/2 -bullet_speed arena_y/2 agent_beam_size_x = beam_width; rewardagent_beam_size_y = 10 agent_y - disp_y/2 + arena_y/2 else: reward =+= -25 agent_bullet_x = agent_bullet_y = bullet_ob agent_bullet_fire = False elif agent_action == 2: if agent_x + character_radius + bordercharacter_size < disp_x/2 + arena_x/2: agent_bullet_direction_x = bullet_speed; agent_bullet_direction_yagent_beam_x = 0 agent_x + character_size; rewardagent_beam_y = 10 agent_y + character_size/2 - else:beam_width/2 rewardagent_beam_size_x = -25 disp_x/2 + arena_x/2 - agent_x - character_size; agent_beam_size_y = beam_width agent_bullet_x = agent_bullet_y = bullet_obelse: agent_bullet_firereward =+= False-25 elif agent_action == 3: if agent_y + character_radius + bordercharacter_size < disp_y/2 + arena_y/2: agent_bullet_direction_x = 0; agent_bullet_direction_yagent_beam_x = bullet_speed agent_x + character_size/2 - beam_width/2; rewardagent_beam_y = 10 agent_y + else:character_size rewardagent_beam_size_x = -25 beam_width; agent_beam_size_y = disp_y/2 + arena_y/2 - agent_y - character_size agent_bullet_x = agent_bullet_y = bullet_obelse: agent_bullet_firereward =+= False-25 elif agent_action == 4: if agent_x - character_radius - border > disp_x/2 - arena_x/2: agent_bullet_direction_x = -bullet_speed; agent_bullet_direction_yagent_beam_x = 0 disp_x/2 - arena_x/2; rewardagent_beam_y = 10 else: agent_y reward+ =character_size/2 -25 agent_bullet_x = agent_bullet_y = bullet_obbeam_width/2 agent_bullet_fireagent_beam_size_x = False if agent_bullet_fire == True: agent_bullet_xagent_x =- agent_xdisp_x/2 + agent_bullet_direction_x;arena_x/2; agent_bullet_yagent_beam_size_y = agent_y + agent_bullet_direction_ybeam_width last_agent_bullet_x = agent_bullet_x; last_agent_bullet_y = agent_bullet_yelse: else: reward =+= -2025 elif 5 <= agent_action <= 8: if agent_action == 5: agent_y -= character_move_speed if agent_y - character_radius - border <= disp_y/2 - arena_y/2: agent_y = disp_y/2 - arena_y/2 + character_radius + border reward =+= -5 elseelif bot_y <= agent_y <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y + character_size reward += -2 elif agent_action == 6: agent_x += character_move_speed if agent_x + character_radius + bordercharacter_size >= disp_x/2 + arena_x/2: agent_x = disp_x/2 + arena_x/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_x <= agent_x + character_size <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: rewardagent_x = 5bot_x - character_size reward += -2 elif agent_action == 7: agent_y += character_move_speed if agent_y + character_radius + bordercharacter_size >= disp_y/2 + arena_y/2: agent_y = disp_y/2 + arena_y/2 - character_radius - bordercharacter_size reward =+= -5 elseelif bot_y <= agent_y + character_size <= bot_y + character_size and bot_x <= agent_x <= bot_x + character_size: rewardagent_y = 5bot_y - character_size reward += -2 elif agent_action == 8: agent_x -= character_move_speed if agent_x - character_radius - border <= disp_x/2 - arena_x/2: agent_x = disp_x/2 - arena_x/2 + character_radius + border reward =+= -5 else: elif bot_x <= agent_x <= bot_x + character_size and bot_y <= agent_y <= bot_y + character_size: reward = 5 if agent_bullet_fire == True: agent_x = last_agent_bullet_xbot_x =+ agent_bullet_x;character_size last_agent_bullet_y = agent_bullet_y agent_bullet_x += agent_bullet_direction_x; agent_bullet_y reward += agent_bullet_direction_y-2 if agent_beam_fire == True: if bullet_hit_detectorbeam_hit_detector("agent"): print#print "Bot Got Hit!" bot_hp -= bullet_damagebeam_damage reward = 100 agent_bullet_fire =+= False50 agent_bullet_direction_xagent_beam_size_x = 0; agent_bullet_direction_yagent_beam_size_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_xagent_beam_x = last_agent_bullet_yagent_beam_y = bullet_obbeam_ob if bot_hp <= 0: successful = True cont = False winner = "Agent" elif agent_bullet_x + bullet_radius >= disp_x/2 + arena_x/2 or agent_bullet_x - bullet_radius <= disp_x/2 - arena_x/2 or \ agent_bullet_y + bullet_radius >= disp_y/2 + arena_y/2 or agent_bullet_y - bullet_radius <= disp_y/2 - arena_y/2: agent_bullet_fire = False agent_bullet_direction_x = 0; agent_bullet_direction_y = 0 agent_bullet_x = agent_bullet_y = bullet_ob; last_agent_bullet_x = last_agent_bullet_y = bullet_ob return reward, cont, successful, winner def bot_beam_dir_detector(): global bot_current_action if bot_current_action == 1: bot_beam_dir = 2 elif bot_current_action == 2: bot_beam_dir = 4 elif bot_current_action == 3: bot_beam_dir = 3 elif bot_current_action == 4: bot_beam_dir = 1 else: bot_beam_dir = 0 return bot_beam_dir #Parameters y = 0.75 e = 0.3 num_episodes = 10000 batch_size = 10 complexity = 10100 with tf.Session() as sess: sess.run(initialize) success = 0 for i in tqdm(range(1, num_episodes)): #print "Episode #", i rAll = 0; d = False; c = True; j = 0 param_init() samples = [] while c == True: j += 1 screen_blit() current_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]]) b = bot_take_action() if np.random.rand(1) < e or i <= 5: a = random.randint(0, 8) else: a, _ = sess.run([predict, Q],feed_dict={input_layer : current_state}) r, c, d, winner = action(a + 1, bot_actionb) bot_beam_dir = bot_beam_dir_detector() next_state = np.array([[mapping(complexity, float(agent_x) / float(arena_x)), mapping(complexity, float(agent_y) / float(arena_y)), mapping(complexity, float(bot_x) / float(arena_x)), mapping(complexity, float(bot_y) / float(arena_y)), mapping#mapping(complexity, float(bot_bullet_xagent_hp) / float(arena_x)character_init_health)), mapping#mapping(complexity, float(bot_bullet_ybot_hp) / float(arena_y)character_init_health)), mapping(complexity, abs(float(agent_x - bot_x)) / float(arena_x)), mapping(complexity, abs(float(agent_y - bot_y)) / float(arena_y)), bot_beam_dir ]]) samples.append([current_state, a, r, next_state]) if len(samples) > 10: for count in xrange(batch_size): [batch_current_state, action_taken, reward, batch_next_state] = samples[random.randint(0, len(samples) - 1)] batch_allQ = sess.run(Q, feed_dict={input_layer : batch_current_state}) batch_Q1 = sess.run(Q, feed_dict = {input_layer : batch_next_state}) batch_maxQ1 = np.max(batch_Q1) batch_targetQ = batch_allQ batch_targetQ[0][a] = reward + y * batch_maxQ1 sess.run([updateModel], feed_dict={input_layer : batch_current_state, next_Q : batch_targetQ}) rAll += r screen_blit() if d == True: e = 1. / ((i / 50) + 10) success += 1 break #print agent_hp, bot_hp display.update() jList.append(j) rList.append(rAll) print winner print "Successful episodes: %d out of %d. Success Rate = %d" % (success, num_episodes, float(success)/float(num_episodes)) plt.plot(rList) plt.show() Update #3 on August 22, 2017:
I've noticed that if the agent hits the bot with a bullet on a turn and the action the bot taken on that turn was not "fire a bullet", then the wrong actions would be given credit. Thus, I've turned the bullets into beams so the bot/agent takes damage on the turn the beam's fired.