import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from IPython.display import clear_output import random from time import sleep import numpy as np import math import openpyxl import pprint import csv def get_player_input(play_area, first_inputter): choosable_area = [str(area) for area in play_area if type(area) is int] while(True): player_input = input('Choose a number!>>>') if player_input in choosable_area: player_input = int(player_input) break else: print('Wrong input!\nChoose a number from' \ '{}'.format(choosable_area)) if first_inputter == 1: play_area[play_area.index(player_input)] = '○' elif first_inputter == 2: play_area[play_area.index(player_input)] = '×' return play_area, player_input def get_ai_input(play_area, first_inputter, mode=0, q_table=None, epsilon=None): choosable_area = [str(area) for area in play_area if type(area) is int] if mode == 0: ai_input = int(random.choice(choosable_area)) elif mode == 1: ai_input = get_ql_action(play_area, choosable_area, q_table, epsilon) if first_inputter == 1: play_area[play_area.index(ai_input)] = '×' elif first_inputter == 2: play_area[play_area.index(ai_input)] = '○' return play_area, ai_input def show_play(play_area, inputter=0, inputted=0): markers = ['$' + str(marker) + '$' for marker in play_area] marker_count = 0 # p1: x p2: o for i in range(3): print('-------------') out = '| ' for j in range(3): if markers[marker_count] == '$○$': token = 'x' elif markers[marker_count] == '$×$': token = 'o' else: token = str(marker_count+1) out += token + ' | ' marker_count += 1 print(out) print('-------------') """ clear_output() plt.figure(figsize=(6, 6)) plt.plot() plt.xticks([0, 5, 10, 15]) plt.yticks([0, 5, 10, 15]) plt.tick_params(labelbottom='off', bottom='off') plt.tick_params(labelleft='off', left='off') plt.xlim(0, 15) plt.ylim(0, 15) x_pos = [2.5, 7.5, 12.5] y_pos = [2.5, 7.5, 12.5] markers = ['$' + str(marker) + '$' for marker in play_area] print(markers) marker_count = 0 for y in reversed(y_pos): for x in x_pos: if markers[marker_count] == '$○$': color = 'r' elif markers[marker_count] == '$×$': color = 'k' else: color = 'b' plt.plot(x, y, marker=markers[marker_count], markersize=30, color=color) marker_count += 1 if inputter == 0: title = 'Play the TIC TAC TOE!!' else: title = '{} chose {}!!'.format(inputter, inputted) plt.title(title) plt.show() """ def make_q_table(): n_columns = 9 n_rows = 3**9 return np.zeros((n_rows, n_columns)) def find_q_row(play_area): row_index = 0 for index in range(len(play_area)): if play_area[index] == '○': coef = 1 elif play_area[index] == '×': coef = 2 else: coef = 0 row_index += (3 ** index) * coef return row_index def judge(play_area, inputter): end_flg = 0 winner = 'Draw' first_list = [0, 3, 6, 0, 1, 2, 0, 2] second_list = [1, 4, 7, 3, 4, 5, 4, 4] third_list = [2, 5, 8, 6, 7, 8, 8, 6] for first, second, third in zip(first_list, second_list, third_list): if play_area[first] == play_area[second] \ and play_area[first] == play_area[third]: winner = inputter end_flg = 1 break choosable_area = [str(area) for area in play_area if type(area) is int] if len(choosable_area) == 0: end_flg = 1 return winner, end_flg def q_learning(play_area, ai_input, reward, play_area_next, q_table, end_flg): row_index = find_q_row(play_area) row_index_next = find_q_row(play_area_next) column_index = ai_input - 1 if end_flg == 1: q_table[row_index, column_index] = \ q_table[row_index, column_index] + eta \ * (reward - q_table[row_index, column_index]) else: q_table[row_index, column_index] = \ q_table[row_index, column_index] + eta \ * (reward + gamma * np.nanmax(q_table[row_index_next,: ]) \ - q_table[row_index, column_index]) #print(q_table) return q_table def get_ql_action(play_area, choosable_area, q_table, epsilon): if np.random.rand() < epsilon: ai_input = int(random.choice(choosable_area)) else: row_index = find_q_row(play_area) first_choice_flg = 1 for choice in choosable_area: if first_choice_flg == 1: ai_input = int(choice) first_choice_flg = 0 else: if q_table[row_index, ai_input-1] \ < q_table[row_index, int(choice)-1]: ai_input = int(choice) return ai_input def randomAI_vs_QLAI(first_inputter, q_table1, epsilon=0): inputter1 = 'Random AI' inputter2 = 'QL AI' ql_input_list = [] play_area_list = [] play_area = list(range(1, 10)) inputter_count = first_inputter end_flg = 0 ql_flg = 0 reward = 0 i = 0 while True: # Q学習退避用 play_area_tmp = play_area.copy() play_area_list.append(play_area_tmp) # Q学習実行フラグ ql_flg = 0 # AI(Q学習)の手番 if (inputter_count % 2) == 0: # QL AI入力 play_area, ql_ai_input = get_ai_input(play_area, first_inputter, mode=1, q_table=q_table1, epsilon=epsilon) winner, end_flg = judge(play_area, inputter2) # Q学習退避用 ql_input_list.append(ql_ai_input) # 勝利した場合 if winner == inputter2: reward = 1 ql_flg = 1 play_area_before = play_area_list[-1] ql_ai_input_before = ql_input_list[-1] # AI(ランダム)の手番 elif (inputter_count % 2) == 1: play_area, random_ai_input = get_ai_input(play_area, first_inputter+1, mode=0) winner, end_flg = judge(play_area, inputter1) # AI(ランダム)が先手の場合の初手以外は学習 if inputter_count != 1: ql_flg = 1 # Q学習実行 if ql_flg == 1: ql_ai_input_before = ql_input_list[-1] q_table1 = q_learning(play_area_before, ql_ai_input_before, reward, play_area, q_table1, end_flg) if end_flg: break inputter_count += 1 #print('{} win!!!'.format(winner)) return winner, q_table1 def player_vs_QLAI(first_inputter, q_table, epsilon=0): """ プレイヤーとAI(Q学習)のゲームを実行する関数 先手(1:プレイヤー)、2:AI(Q学習))を受け取り、ゲームが終了するまで実行する """ inputter1 = 'YOU' inputter2 = 'QL AI' # Q学習退避用 ql_input_list = [] play_area_list = [] play_area = list(range(1, 10)) show_play(play_area) inputter_count = first_inputter end_flg = 0 ql_flg = 0 reward = 0 while True: # Q学習退避用 play_area_tmp = play_area.copy() play_area_list.append(play_area_tmp) # Q学習実行フラグ ql_flg = 0 # AI(Q学習)の手番 if (inputter_count % 2) == 0: # QL AI入力 play_area, ql_ai_input = get_ai_input(play_area, first_inputter, mode=1, q_table=q_table, epsilon=epsilon) show_play(play_area, inputter2, ql_ai_input) winner, end_flg = judge(play_area, inputter2) # Q学習退避用 ql_input_list.append(ql_ai_input) # 勝利した場合 if winner == inputter2: reward = 1 ql_flg = 1 play_area_before = play_area_list[-1] ql_ai_input_before = ql_input_list[-1] # プレイヤーの手番 elif (inputter_count % 2) == 1: print('Your turn!') # プレイヤーの入力受付 play_area, player_input = get_player_input(play_area, first_inputter) show_play(play_area, inputter1, player_input) winner, end_flg = judge(play_area, inputter1) # プレイヤーが勝利した場合 if winner == inputter1: reward = -1 # プレイヤーが先手の場合の初手以外は学習 if inputter_count != 1: ql_flg = 1 # Q学習実行 if ql_flg == 1: # print('Q学習') ql_ai_input_before = ql_input_list[-1] q_table = q_learning(play_area_before, ql_ai_input_before, reward, play_area, q_table, end_flg) if end_flg: break inputter_count += 1 show_play(play_area) print('{} win!!!'.format(winner)) sleep(1) return winner, q_table def MyGraph(y, x_R, x_Q, x_D): fp = FontProperties(fname=r'C:\WINDOWS\Fonts\YuGothic.ttf', size=14) y = y / 10000 plt.plot(y, x_R, label = "Random") plt.plot(y, x_Q, label = "QL") plt.plot(y, x_D, label = "Draw") plt.ylim(0, 100) plt.xlabel('学習数[万回]', fontproperties=fp) plt.ylabel('勝率[%]', fontproperties=fp) plt.legend() plt.show() q_table1 = make_q_table() eta = 0.1 #学習率 gamma = 0.9 #割引率 initial_epsilon = 0.5 episode = 1000000 d=1000 n = int(episode/(episode/d))+1 winner_list = [] old_R = 0 old_Q = 0 old_D = 0 x_R = np.zeros((n,1)) x_Q = np.zeros((n,1)) x_D = np.zeros((n,1)) y = np.zeros((n,1)) x = np.arange(0, 102, 2) win = np.zeros((n,1)) j = 0 print('') for i in range(episode+1): epsilon = initial_epsilon * (episode-i) / episode winner, _ = randomAI_vs_QLAI(1, q_table1, epsilon) winner_list.append(winner) if i % (episode / d) == 0: if i% 100000 == 0: print('学習数 :{}'.format(i)) #print('Random AI Win :{}'.format(winner_list.count('Random AI') - old_R)) #print('QL AI Win :{}'.format(winner_list.count('QL AI') - old_Q)) #print('Draw :{}'.format(winner_list.count('Draw') - old_D)) print('QLの勝率 :{}'.format((winner_list.count('QL AI') - old_Q) / (episode/d))) print('') # ((新-旧)/1000000/1000))*100 # ((新-旧)/1000))*100 # ((1234-678)/1000))*100 = 0.556*100 = 55.6% x_R[j] = ((winner_list.count('Random AI') - old_R)/(episode/d))*100 x_Q[j] = ((winner_list.count('QL AI') - old_Q)/(episode/d))*100 x_D[j] = ((winner_list.count('Draw') - old_D)/(episode/d))*100 #y[j] = i #win[j] = (winner_list.count('QL AI') - old_Q) / (episode / d) j=j+1 old_R = winner_list.count('Random AI') old_Q = winner_list.count('QL AI') old_D = winner_list.count('Draw') #if x_Q[j] > 0.9: break #MyGraph(y, x_R, x_Q, x_D) #print('回数 : {}.format(i)) """ MyTitle = [str(episode), str(eta), str(gamma)] with open('data.csv', 'a') as file: writer = csv.writer(file, lineterminator='\n') writer.writerow(MyTitle) writer.writerow(x_Q) writer.writerow(x_R) writer.writerow(x_D) """ """ episode = 1 winner_list = [] for i in range(episode): epsilon = initial_epsilon * (episode-i) / episode winner, q_table1 = player_vs_QLAI(1, q_table1, epsilon=0) winner_list.append(winner) """