import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from IPython.display import clear_output
import random
from time import sleep
import numpy as np
import math
import openpyxl
import pprint
import csv

def get_player_input(play_area, first_inputter):
    choosable_area = [str(area) for area in play_area if type(area) is int]
    while(True):
        player_input = input('Choose a number!>>>')
        if player_input in choosable_area:
            player_input = int(player_input)
            break
        else:
            print('Wrong input!\nChoose a number from' \
                  '{}'.format(choosable_area))
    if first_inputter == 1:
        play_area[play_area.index(player_input)] = '○'
    elif first_inputter == 2:
        play_area[play_area.index(player_input)] = '×'
    return play_area, player_input
    
def get_ai_input(play_area, first_inputter, mode=0, q_table=None, epsilon=None):
    choosable_area = [str(area) for area in play_area if type(area) is int]
    if mode == 0:
        ai_input = int(random.choice(choosable_area))
    elif mode == 1:
        ai_input = get_ql_action(play_area, choosable_area, q_table, epsilon)
    if first_inputter == 1:
        play_area[play_area.index(ai_input)] = '×'
    elif first_inputter == 2:
        play_area[play_area.index(ai_input)] = '○'
    return play_area, ai_input

def show_play(play_area, inputter=0, inputted=0):

    markers = ['$' + str(marker) + '$' for marker in play_area]
    marker_count = 0
    # p1: x  p2: o
    for i in range(3):
        print('-------------')
        out = '| '
        for j in range(3):
            if markers[marker_count] == '$○$':
                token = 'x'
            elif markers[marker_count] == '$×$':
                token = 'o'
            else:
                token = str(marker_count+1)
            out += token + ' | '
            marker_count += 1
        print(out)
    print('-------------')
    """
    clear_output()
    plt.figure(figsize=(6, 6))
    plt.plot()
    plt.xticks([0, 5, 10, 15])
    plt.yticks([0, 5, 10, 15])
    plt.tick_params(labelbottom='off', bottom='off')
    plt.tick_params(labelleft='off', left='off')
    plt.xlim(0, 15)
    plt.ylim(0, 15)

    x_pos = [2.5, 7.5, 12.5]
    y_pos = [2.5, 7.5, 12.5]

    markers = ['$' + str(marker) + '$' for marker in play_area]
    print(markers)
    marker_count = 0
    for y in reversed(y_pos):
        for x in x_pos:
            if markers[marker_count] == '$○$':
                color = 'r'
            elif markers[marker_count] == '$×$':
                color = 'k'
            else:
                color = 'b'
            plt.plot(x, y, marker=markers[marker_count], 
                     markersize=30, color=color)
            marker_count += 1
    if inputter == 0:
        title = 'Play the TIC TAC TOE!!'
    else:
        title = '{} chose {}!!'.format(inputter, inputted)
    plt.title(title)
    plt.show()
"""

def make_q_table():
    n_columns = 9
    n_rows = 3**9
    return np.zeros((n_rows, n_columns))

def find_q_row(play_area):
    row_index = 0
    for index in range(len(play_area)):
        if play_area[index] == '○':
            coef = 1
        elif play_area[index] == '×':
            coef = 2
        else:
            coef = 0
        row_index += (3 ** index) * coef
    return row_index
    
def judge(play_area, inputter):
    end_flg = 0
    winner = 'Draw'
    first_list = [0, 3, 6, 0, 1, 2, 0, 2]
    second_list = [1, 4, 7, 3, 4, 5, 4, 4]
    third_list = [2, 5, 8, 6, 7, 8, 8, 6]
    for first, second, third in zip(first_list, second_list, third_list):
        if play_area[first] == play_area[second] \
        and play_area[first] == play_area[third]:
            winner = inputter
            end_flg = 1
            break
    choosable_area = [str(area) for area in play_area if type(area) is int]
    if len(choosable_area) == 0:
        end_flg = 1
    return winner, end_flg
    
def q_learning(play_area, ai_input, reward, play_area_next, q_table, end_flg):
    row_index = find_q_row(play_area)
    row_index_next = find_q_row(play_area_next)
    column_index = ai_input - 1
    if end_flg == 1:
        q_table[row_index, column_index] = \
        q_table[row_index, column_index] + eta \
        * (reward - q_table[row_index, column_index])
    else:
        q_table[row_index, column_index] = \
        q_table[row_index, column_index] + eta \
        * (reward + gamma * np.nanmax(q_table[row_index_next,: ]) \
           - q_table[row_index, column_index])
    #print(q_table)
    return q_table
    
def get_ql_action(play_area, choosable_area, q_table, epsilon):
    if np.random.rand() < epsilon:
        ai_input = int(random.choice(choosable_area))
    else:
        row_index = find_q_row(play_area)
        first_choice_flg = 1
        for choice in choosable_area:
            if first_choice_flg == 1:
                ai_input = int(choice)
                first_choice_flg = 0
            else:
                if q_table[row_index, ai_input-1] \
                < q_table[row_index, int(choice)-1]:
                    ai_input = int(choice)
    return ai_input
    
def randomAI_vs_QLAI(first_inputter, q_table1, epsilon=0):
    inputter1 = 'Random AI'
    inputter2 = 'QL AI'

    ql_input_list = []
    play_area_list = []

    play_area = list(range(1, 10))
    inputter_count = first_inputter
    end_flg = 0
    ql_flg = 0
    reward = 0
    i = 0
    while True:
        # Q学習退避用
        play_area_tmp = play_area.copy()
        play_area_list.append(play_area_tmp)
        # Q学習実行フラグ
        ql_flg = 0
        # AI(Q学習)の手番
        if (inputter_count % 2) == 0:
            # QL AI入力
            play_area, ql_ai_input = get_ai_input(play_area, 
                                                  first_inputter,
                                                  mode=1, 
                                                  q_table=q_table1, 
                                                  epsilon=epsilon)
            winner, end_flg = judge(play_area, inputter2)
            # Q学習退避用
            ql_input_list.append(ql_ai_input)        
            # 勝利した場合
            if winner == inputter2:
                reward = 1
                ql_flg = 1
            play_area_before = play_area_list[-1]
            ql_ai_input_before = ql_input_list[-1]
        # AI(ランダム)の手番
        elif (inputter_count % 2) == 1:
            play_area, random_ai_input = get_ai_input(play_area, 
                                                      first_inputter+1, 
                                                      mode=0)
            winner, end_flg = judge(play_area, inputter1)
            # AI(ランダム)が先手の場合の初手以外は学習
            if inputter_count != 1:
                ql_flg = 1
        # Q学習実行
        if ql_flg == 1:
            ql_ai_input_before = ql_input_list[-1]
            q_table1 = q_learning(play_area_before, ql_ai_input_before,
                                 reward, play_area, q_table1, end_flg)
        if end_flg:
            break
        inputter_count += 1
    #print('{} win!!!'.format(winner))
    return winner, q_table1
    
def player_vs_QLAI(first_inputter, q_table, epsilon=0):
    """
    プレイヤーとAI(Q学習)のゲームを実行する関数

    先手(1:プレイヤー)、2:AI(Q学習))を受け取り、ゲームが終了するまで実行する
    """
    inputter1 = 'YOU'
    inputter2 = 'QL AI'

    # Q学習退避用
    ql_input_list = []
    play_area_list = []

    play_area = list(range(1, 10))
    show_play(play_area)
    inputter_count = first_inputter
    end_flg = 0
    ql_flg = 0
    reward = 0
    while True:
        # Q学習退避用
        play_area_tmp = play_area.copy()
        play_area_list.append(play_area_tmp)
        # Q学習実行フラグ
        ql_flg = 0
        # AI(Q学習)の手番
        if (inputter_count % 2) == 0:
            # QL AI入力
            play_area, ql_ai_input = get_ai_input(play_area, 
                                                  first_inputter,
                                                  mode=1, 
                                                  q_table=q_table, 
                                                  epsilon=epsilon)
            show_play(play_area, inputter2, ql_ai_input)
            winner, end_flg = judge(play_area, inputter2)
            # Q学習退避用
            ql_input_list.append(ql_ai_input)            
            # 勝利した場合
            if winner == inputter2:
                reward = 1
                ql_flg = 1
            play_area_before = play_area_list[-1]
            ql_ai_input_before = ql_input_list[-1]
        # プレイヤーの手番
        elif (inputter_count % 2) == 1:
            print('Your turn!')
            # プレイヤーの入力受付
            play_area, player_input = get_player_input(play_area, first_inputter)
            show_play(play_area, inputter1, player_input)
            winner, end_flg = judge(play_area, inputter1)
            # プレイヤーが勝利した場合
            if winner == inputter1:
                reward = -1
            # プレイヤーが先手の場合の初手以外は学習
            if inputter_count != 1:
                ql_flg = 1
        # Q学習実行
        if ql_flg == 1:
        # print('Q学習')
            ql_ai_input_before = ql_input_list[-1]
            q_table = q_learning(play_area_before, ql_ai_input_before,
                                 reward, play_area, q_table, end_flg)
        if end_flg:
            break
        inputter_count += 1
    show_play(play_area)
    print('{} win!!!'.format(winner))
    sleep(1)
    return winner, q_table


def MyGraph(y, x_R, x_Q, x_D):
    fp = FontProperties(fname=r'C:\WINDOWS\Fonts\YuGothic.ttf', size=14)
    y = y / 10000

    plt.plot(y, x_R, label = "Random")
    plt.plot(y, x_Q, label = "QL")
    plt.plot(y, x_D, label = "Draw")

    plt.ylim(0, 100)

    plt.xlabel('学習数[万回]', fontproperties=fp)
    plt.ylabel('勝率[%]', fontproperties=fp)

    plt.legend()
    plt.show()

q_table1 = make_q_table()
eta = 0.1  #学習率
gamma = 0.9 #割引率
initial_epsilon = 0.5

episode = 1000000
d=1000
n = int(episode/(episode/d))+1
winner_list = []

old_R = 0
old_Q = 0
old_D = 0

x_R = np.zeros((n,1))
x_Q = np.zeros((n,1))
x_D = np.zeros((n,1))
y = np.zeros((n,1))
x = np.arange(0, 102, 2)
win = np.zeros((n,1))
j = 0

print('')
for i in range(episode+1):
    epsilon = initial_epsilon * (episode-i) / episode
    winner, _ = randomAI_vs_QLAI(1, q_table1, epsilon)
    winner_list.append(winner)
    if i % (episode / d) == 0:
        
        if i% 100000 == 0:
            print('学習数        :{}'.format(i))
            #print('Random AI Win :{}'.format(winner_list.count('Random AI') - old_R))
            #print('QL AI Win     :{}'.format(winner_list.count('QL AI') - old_Q))
            #print('Draw          :{}'.format(winner_list.count('Draw') - old_D))
            print('QLの勝率      :{}'.format((winner_list.count('QL AI') - old_Q) / (episode/d)))
            print('')
            
        # ((新-旧)/1000000/1000))*100
        # ((新-旧)/1000))*100
        # ((1234-678)/1000))*100 = 0.556*100 = 55.6%
        
        x_R[j] = ((winner_list.count('Random AI') - old_R)/(episode/d))*100
        x_Q[j] = ((winner_list.count('QL AI') - old_Q)/(episode/d))*100
        x_D[j] = ((winner_list.count('Draw') - old_D)/(episode/d))*100
        #y[j] = i
        #win[j] = (winner_list.count('QL AI') - old_Q) / (episode / d)
        j=j+1
        
        old_R = winner_list.count('Random AI')
        old_Q = winner_list.count('QL AI')
        old_D = winner_list.count('Draw')
        
    #if x_Q[j] > 0.9: break
#MyGraph(y, x_R, x_Q, x_D)

#print('回数  :  {}.format(i))

"""
MyTitle = [str(episode), str(eta), str(gamma)]

with open('data.csv', 'a') as file:
    writer = csv.writer(file, lineterminator='\n')
    writer.writerow(MyTitle)
    writer.writerow(x_Q)
    writer.writerow(x_R)
    writer.writerow(x_D)
"""
"""
episode = 1
winner_list = []
for i in range(episode):
    epsilon = initial_epsilon * (episode-i) / episode
    winner, q_table1 = player_vs_QLAI(1, q_table1, epsilon=0)
    winner_list.append(winner)
"""