import os import tensorflow as tf import numpy as np import pandas as pd import random from collections import OrderedDict import test_train config = tf.ConfigProto( device_count={"GPU":0}, # GPUの数0に log_device_placement=True ) # tictactoe EMPTY = 0 # 空白なら0 PLAYER_X = 1 # バツなら1 PLAYER_O = 2 # マルなら2 MARKS = {PLAYER_X:"X", PLAYER_O:"O", EMPTY:" "} DRAW = 3 ''' モデルファイル用設定 ''' MODEL_DIR = os.path.join(os.path.dirname(__file__), 'models') if os.path.exists(MODEL_DIR) is False: os.mkdir(MODEL_DIR) ''' log_nwon用設定 ''' LOG_NWON_DIR = os.path.join(os.path.dirname(__file__), 'log_nwon') if os.path.exists(LOG_NWON_DIR) is False: os.mkdir(LOG_NWON_DIR) class TTTBoard: def __init__(self, board=None): if board==None: self.board = [] for i in range(9): self.board.append(EMPTY) else: self.board=board self.winner=None def get_possible_pos(self): pos=[] for i in range(9): if self.board[i]==EMPTY: pos.append(i) return pos def print_board(self): tempboard=[] for i in self.board: tempboard.append(MARKS[i]) row = ' {} | {} | {} ' hr = '\n-----------\n' print((row + hr + row + hr + row).format(*tempboard)) def cheak_winner(self): win_cond = ((1,2,3),(4,5,6),(7,8,9),(1,4,7),(2,5,8),(3,6,9),(1,5,9),(3,5,7)) for each in win_cond: if self.board[each[0]-1] == self.board[each[1]-1] == self.board[each[2]-1]: if self.board[each[0]-1] != EMPTY: self.winner = self.board[each[0]-1] return self.winner return None def check_draw(self): if len(self.get_possible_pos()) == 0 and self.winner is None: self.winner = DRAW return DRAW return None def move(self, pos, player): if self.board[pos] == EMPTY: self.board[pos] = player self.cheak_winner() self.check_draw() def clone(self): return TTTBoard(self.board.copy()) class TTT_GameOrganizer: act_turn=0 winner=None def __init__(self,px,po,nplay=10000,showBoard=False,showResult=False,stat=10000,model_name=None): self.player_x=px self.player_o=po self.nwon={px.myturn:0,po.myturn:0,DRAW:0} self.nplay=nplay self.players=(self.player_x,self.player_o) self.board=None self.disp=showBoard self.showResult=showResult self.player_turn=self.players[random.randrange(2)] self.nplayed=0 self.stat=stat self.model_name=model_name def progress(self): while self.nplayed < self.nplay: self.board=TTTBoard() self.player_turn=self.players[0] while self.board.winner==None: if self.disp:print("Turn is "+self.player_turn.name) act=self.player_turn.act(self.board) self.board.move(act,self.player_turn.myturn) if self.disp:self.board.print_board() if self.board.winner != None: if self.board.winner == DRAW: if self.showResult:print("DrawGame") elif self.board.winner == self.player_turn.myturn: out = "Winner : " + self.player_turn.name if self.showResult: print(out) else: print("Invalid Move!") self.nwon[self.board.winner]+=1 else: self.switch_player() self.nplayed+=1 if self.nplayed%self.stat==0 or self.nplayed==self.nplay: print(self.player_x.name+":"+str(self.nwon[self.player_x.myturn])+","+self.player_o.name+":" +str(self.nwon[self.player_o.myturn])+",DRAW:"+str(self.nwon[DRAW])) # log_nwon if self.nplayed==self.nplay: log_nwon = [] _log = OrderedDict([(self.player_x.name, self.nwon[self.player_x.myturn]), (self.player_o.name, self.nwon[self.player_o.myturn]), ('DRAW', self.nwon[DRAW])]) log_nwon.append(_log) df = pd.DataFrame(log_nwon) if not os.path.isfile(LOG_NWON_DIR + self.model_name): df.to_csv(LOG_NWON_DIR + self.model_name, header=True, index=False, mode='w') else: df.to_csv(LOG_NWON_DIR + self.model_name, header=False, index=False, mode='a') def switch_player(self): if self.player_turn == self.player_x: self.player_turn=self.player_o else: self.player_turn= self.player_x class PlayerRandom: def __init__(self,turn): self.name="Random" self.myturn=turn def act(self,board): acts=board.get_possible_pos() i=random.randrange(len(acts)) return acts[i] class CPU: # 最適手を打つプレイヤー def __init__(self, turn, name="CPU"): self.name = name self.myturn = turn if self.myturn == PLAYER_X: self.yourturn = PLAYER_O else: self.yourturn = PLAYER_X def act(self, board): response = [] acts=board.get_possible_pos() # 先手である場合、1手目はランダムに打つ if len(acts) == 9: i = random.randrange(len(acts)) return acts[i] # 勝てる手がある場合は指す for act in acts: tempboard=board.clone() tempboard.move(act,self.myturn) if tempboard.winner == self.myturn: return act # 次の手で相手に勝たれてしまう場合は防ぐ for act in acts: tempboard=board.clone() tempboard.move(act,self.yourturn) if tempboard.winner == self.yourturn: return act # 可能なら中央へ打つ for act in acts: if act == 4: return act # 後手かつ 2手目の場合 if len(acts) == 6: ''' x| | | |x ----- ----- |o| |o| ----- ----- | |x x| | 上記の場合、辺に打たなければ負ける。 ''' cond1 = ((1,5,9),(3,5,7)) for each in cond1: if board.board[each[0]-1] == board.board[each[2]-1] != board.board[each[1]-1]: if board.board[each[0]-1] != EMPTY: for act in acts: if act % 2 == 1: response.append(act) i = random.randrange(len(response)) return response[i] # 先手かつ 3手目の場合 if len(acts) == 5: ''' o|1|1 ----- | |o ----- | | 上記の場合(回転したものや対称のものも含む)、1 の箇所どちらかに打たなければ負ける。 1の箇所に既に打っているのならこのことを考慮する必要はない ''' cond2 = ((0,5),(2,7),(3,8),(1,6),(0,7),(2,3),(1,8),(5,6)) pos = ((1,2),(5,8),(6,7),(0,3),(3,6),(0,1),(2,5),(7,8)) for i in range(len(cond2)): if board.board[cond2[i][0]] == board.board[cond2[i][1]]: if board.board[cond2[i][0]] != EMPTY: if board.board[pos[i][0]] == board.board[pos[i][1]]: if board.board[pos[i][0]] == EMPTY: j = random.randrange(2) return pos[i][j] # 可能なら角に打つ for act in acts: if act % 2 == 0 and act != 4: response.append(act) if not len(response) == 0: i = random.randrange(len(response)) return response[i] # 上記に当てはまらない場合 i=random.randrange(len(acts)) return acts[i] class LSTM(test_train.LSTM): def __init__(self, name='LSTM', turn=None, model_name=None): super().__init__() self.name = name self.myturn = turn self.model_name = model_name self.past_data = [] tf.reset_default_graph() # モデル設定 self.x = tf.placeholder(dtype=tf.float32, shape=[None, self.max_len, self.n_in]) self.y = self.inference(self.x) self.predictions = self.softmax_function(y=self.y) self.masked_predictions = self.masked_softmax(y=self.y, x=self.x) # session(モデルの検証が終わったら閉じること) self.sess = tf.Session(config=config) # 保存したモデルのパラメータ復元 saver = tf.train.Saver() saver.restore(self.sess, self.model_name) def softmax_function(self, y): prediction = tf.nn.softmax(y, axis=-1) return prediction def masked_softmax(self, y, x): # 既に打たれている場所は打つ手から除く _padding = tf.fill(tf.shape(x), 0.) mask = tf.equal(x, _padding) padding = tf.fill(tf.shape(y), -np.inf) masked_y = tf.where(mask, y, padding) masked_prediction = tf.nn.softmax(masked_y, axis=-1) return masked_prediction def act(self, board): # 打たせる acts=board.get_possible_pos() if len(acts) >= 8: # ゲーム毎にreset self.past_data.clear() self.past_data.append(board.board[:]) batch = self.preprocessing() feed_dict = { self.x: batch } predictions = self.predictions.eval(session=self.sess, feed_dict=feed_dict) # 出力するもののみ抽出 act = self.processing_of_output(predictions) for option in acts: # choices if act == option: return act # モデルの予測した場所に打てない場合 # 既に打たれている場所は候補から除く masked_predictions = self.masked_predictions.eval(session=self.sess, feed_dict=feed_dict) act = self.processing_of_output(masked_predictions) for option in acts: if act == option: return act return None def preprocessing(self): input_data = [] padding = [0 for i in range(9)] input_data = self.past_data[:] for i in range(len(input_data), self.max_len): # padding input_data.append(padding) input_data = np.array(input_data) batch = input_data.reshape(1, self.max_len, self.n_out) return batch def processing_of_output(self, predictions): # many to many → many to one subscript = len(self.past_data)-1 output = predictions[0][subscript] prediction = np.argmax(output, axis=-1) return prediction def close_session(self): self.sess.close() if __name__ == '__main__': nplay = 10000 n_validation = 1 n_model = 152 ''' model_number = 152 model_name = '{0}/LSTM_model_ndata_{1}_77824_nhidden_256_batch_512_epochs_100.ckpt'.format(model_number, 512*model_number) #p1=CPU(PLAYER_X) p1=PlayerRandom(PLAYER_X) p2=LSTM(turn=PLAYER_O, model_name=model_name) game=TTT_GameOrganizer(p1, p2, nplay=nplay, model_name='/test.csv') game.progress() p2.close_session() ''' for i in range(n_validation): for j in range(n_model): model_name = '{0}/LSTM_model_ndata_{1}_77824_nhidden_256_batch_512_epochs_100.ckpt'.format(j+1, 512*(j+1)) _model_name = '/01Random_vs_LSTM_model_nplay_{0}_ndata_{1}_77824_nhidden_256_batch_512_epochs_100.csv'.format(nplay, 512*(j+1)) # validation p1=PlayerRandom(PLAYER_X) p2=LSTM(turn=PLAYER_O, model_name=model_name) game=TTT_GameOrganizer(p1,p2,model_name=_model_name) game.progress() p2.close_session() for i in range(n_validation): for j in range(n_model): model_name = '{0}/LSTM_model_ndata_{1}_77824_nhidden_256_batch_512_epochs_100.ckpt'.format(j+1, 512*(j+1)) _model_name = '/CPU_vs_LSTM_model_nplay_{0}_ndata_{1}_77824_nhidden_256_batch_512_epochs_100.csv'.format(nplay, 512*(j+1)) # validation p1=CPU(PLAYER_X) p2=LSTM(turn=PLAYER_O, model_name=model_name) game=TTT_GameOrganizer(p1,p2,model_name=_model_name) game.progress() p2.close_session()