import os
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.utils import shuffle

config = tf.ConfigProto()
config.gpu_options.allow_growth = True

max_len = 4

'''
モデルファイル用設定
'''
MODEL_DIR = os.path.join(os.path.dirname(__file__), 'model')

if os.path.exists(MODEL_DIR) is False:
    os.mkdir(MODEL_DIR)

'''
ログファイル用設定
'''
LOG_DIR = os.path.join(os.path.dirname(__file__), 'log')

if os.path.exists(LOG_DIR) is False:
    os.mkdir(LOG_DIR)

'''
tensorboardログファイル用設定
'''
TBLOG_DIR = os.path.join(os.path.dirname(__file__), 'tblog')

if os.path.exists(TBLOG_DIR) is False:
    os.mkdir(TBLOG_DIR)

class Prepare_data:
    
    def __init__(self, csv_file=None, fillna=-1, batch_size=1):
        self.df = pd.read_csv(csv_file, header=None)
        self.row, self.col = self.df.shape
        self.columns = ['0','1','2','3','4','5','6','7','8']
        self.fillna = fillna
        for i in range(self.col, 9):
            self.df[self.columns[i]]=np.nan
        self.df = self.df.fillna(self.fillna).astype(np.int8)
        self.col = len(self.df.columns)
        self.max_len = max_len
        self.batch_size = batch_size
        
    
    def get_rand_move(self):
        board, play_response = [], []
        temp_b, temp_r = [], []
        tempboard_b = [0 for i in range(9)]
        tempboard_r = [0 for i in range(9)]
        for i in range(self.row):
            for j in range(self.col-1):
                if j % 2 == 0 and self.df.loc[i][j] != self.fillna:
                    tempboard_b[self.df.loc[i][j]] = 1
                elif j % 2 == 1 and self.df.loc[i][j] != self.fillna:
                    tempboard_b[self.df.loc[i][j]] = 2
                else:
                    tempboard_b[:] = [0 for i in range(9)]
                if j % 2 == 1:
                    temp_b.append([x for x in tempboard_b[0:9]])
                    a = self.df.loc[i][j+1]
                    if a != self.fillna:
                        tempboard_r[a] = 1
                    temp_r.append([x for x in tempboard_r[0:9]])
                    tempboard_r[:] = [0 for i in range(9)]
            board.append(temp_b.copy())
            play_response.append(temp_r.copy())
            temp_b.clear()
            temp_r.clear()
            tempboard_b[:] = [0 for i in range(9)]
        return board, play_response

   
    def reduce_data(self, X, Y):
        surplus = len(X) % self.batch_size
        length = len(X) - surplus
        x_, y_ = shuffle(X, Y)
        x = x_[0 : length]
        y = y_[0 : length]
        return x, y
   
    
    def splitting_datasets(self, data, label, seq):
        X, Y = shuffle(data, label)
        X_train = X[0 : self.batch_size*seq]
        Y_train = Y[0 : self.batch_size*seq]
        return X_train, Y_train


class LSTM:

    def __init__(self, epochs=1, max_len=max_len, file_name=None):
        self.n_in = 9
        self.n_hidden = 256
        self.n_out = 9
        self.epochs = epochs
        self.max_len = max_len
        self.file_name = file_name
     
        
    def inference(self, x):
        def weight_variable(scope_name, shape):
            with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
                initial = tf.truncated_normal(shape, stddev=0.01)
                w = tf.get_variable("weight", initializer=initial)
                return w

        def bias_variable(scope_name, shape):
            with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE):
                initial = tf.truncated_normal(shape, stddev=0.01)
                b = tf.get_variable("bias", initializer=initial)
                return b
        
        with tf.name_scope('inference'):
            batch_size_T = tf.shape(x)[0]
            
            cell = tf.contrib.rnn.LSTMCell(num_units=self.n_hidden, use_peepholes=False, activation=None, reuse=tf.AUTO_REUSE, name='LSTMCell')
            initial_state = cell.zero_state(batch_size_T, tf.float32)
            
            output, final_state = tf.nn.dynamic_rnn(cell=cell, inputs=x, sequence_length=self.length(x),\
                                                    initial_state=initial_state, time_major=False)
            
            V = weight_variable("output_V", [self.n_hidden, self.n_out])
            c = bias_variable("output_c", [self.n_out])
            reshaped_output, mask = self.reshape_1(output, self.length(x))
            reshaped_output = tf.reshape(reshaped_output, [-1, self.n_hidden])
            reshaped_output = tf.matmul(reshaped_output, V) + c
            y = self.reshape_2(reshaped_output, output, mask)
        return y

    def cost(self, y, t):
        with tf.name_scope('cost'):
            #cost = tf.reduce_mean(-tf.reduce_sum(t * tf.log(y + 1e-10)))     # cross_entropy
            cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(t, -1), logits=y))
        tf.summary.scalar('cost', cost)
        return cost

    def training(self, cost):
        with tf.name_scope('training'):
            optimizer = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)
            train_step = optimizer.minimize(cost)
        return train_step
    
    
    def accuracy(self, y, t):
        with tf.name_scope('accuracy'):
            correct = tf.equal(tf.argmax(t, axis=-1), tf.argmax(y, axis=-1))
        return tf.reduce_mean(tf.cast(correct, dtype=tf.float32))
    
    
    def length(self, sequence):
        used = tf.sign(tf.reduce_max(tf.abs(sequence), axis=-1))
        length = tf.reduce_sum(used, axis=-1)
        length = tf.cast(length, tf.int32)
        return length
    
    
    def reshape_1(self, output, length):
        mask = tf.sequence_mask(length, self.max_len)
        reshaped_output = tf.boolean_mask(output, mask)
        return reshaped_output, mask
    
    
    def reshape_2(self, reshaped_output, output, mask):
        idx = tf.to_int32(tf.where(mask))
        shape = tf.concat([tf.shape(output)[:-1], tf.shape(reshaped_output)[-1:]], 0)
        prediction = tf.scatter_nd(idx, reshaped_output, shape)
        return prediction
    
    
    def masked_softmax(self, y, x):
        _padding = tf.fill(tf.shape(x), 0.)
        mask = tf.equal(x, _padding)
        padding = tf.fill(tf.shape(y), -np.inf)
        masked_y = tf.where(mask, y, padding)
        masked_prediction = tf.nn.softmax(masked_y, axis=-1)
        return masked_prediction
    
    
    def train(self, data=None, label=None, n_model=1, batch_size=1, n_data=None, divided_data=None):
        tf.reset_default_graph()
        # モデル設定
        x = tf.placeholder(dtype=tf.float32, shape=[None, self.max_len, self.n_in])
        target = tf.placeholder(dtype=tf.float32, shape=[None, self.max_len, self.n_out])
        
        y = self.inference(x)
        #masked_prediction = self.masked_softmax(y=y, x=x)
        cost = self.cost(y=y, t=target)
        training = self.training(cost)
        accuracy = self.accuracy(y, target)
        
        # データ設定
        n_batches = int(len(data))
        
        # ログ用
        logs = []
        
        # モデル学習
        with tf.Session(config=config) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            summaries = tf.summary.merge_all()
            file_writer = tf.summary.FileWriter(TBLOG_DIR, sess.graph)
            saver = tf.train.Saver(max_to_keep=None, pad_step_number=True)
            for epoch in range(self.epochs):
                X_, Y_ = shuffle(data, label)
                train_cost = 0
                train_acc = 0
                count = 0
                for i in range(0, n_batches, batch_size):
                    input_batch = X_[i : i + batch_size if i + batch_size < n_batches else n_batches]
                    output_batch = Y_[i : i + batch_size if i + batch_size < n_batches else n_batches]
                    feed_dict = {
                            x: input_batch,
                            target: output_batch
                            }
                    sess.run(training, feed_dict=feed_dict)
                    train_cost += cost.eval(session=sess, feed_dict=feed_dict)
                    train_acc += accuracy.eval(session=sess, feed_dict=feed_dict)
                    count += 1
                    
                train_cost /= count 
                train_acc /= count
                #if (epoch+1) % 10 == 0 or epoch == 0:
                print("epoch: {0}, cost: {1:.3f}, accuracy: {2:3.1f}".format(epoch+1, train_cost, train_acc*100))
                # logging
                log = {'epoch': epoch+1, 'train_cost': train_cost, 'train_accuracy': train_acc}
                logs.append(log)
                # Epochが終わるごとにTensorBoard用に値を保存
                summary = sess.run(summaries, feed_dict=feed_dict)
                file_writer.add_summary(summary, epoch)
                
            #print("Training finished")
            # save_model
            model_path = saver.save(sess, MODEL_DIR + '/{0}/{6}_ndata_{1}_{2}_nhidden_{3}_batch_{4}_epochs_{5}.ckpt'.format(n_model, divided_data,\
                                    n_data, self.n_hidden, batch_size, self.epochs, self.file_name))
            print("Model saved to:", model_path)
            # save_logs
            df = pd.DataFrame(logs)
            df.to_csv(LOG_DIR + '/{5}_acccost_train_ndata_{0}_{1}_nhidden_{2}_batch_{3}_epochs_{4}.csv'.format(divided_data,\
                      n_data, self.n_hidden, batch_size, self.epochs, self.file_name), index=False, mode='w')

        
if __name__ == '__main__':
    # data_making
    file_name = "dataset_s.csv"
    #file_name = "dataset_sd.csv"
    batch_size = 1
    data = Prepare_data(csv_file=file_name, batch_size=batch_size)
    X, Y = data.get_rand_move()
    x, y = data.reduce_data(X, Y)
    length = int(len(x) / batch_size)
    
    # train
    model = LSTM(epochs=100, file_name=file_name)
    model.train(data=X, label=Y, n_model=1, batch_size=batch_size, n_data=len(X), divided_data=len(X))
    '''
    for i in range(length):
        x_train, y_train = data.splitting_datasets(data=x, label=y, seq=i+1)
        if not len(x_train) == 0:
            model.train(data=x_train, label=y_train, n_model=i+1, batch_size=batch_size, n_data=len(x), divided_data=len(x_train))
    '''