Minimax Tic Tac Toe 游戏没有选择最佳着法

问题描述 投票:0回答:0

我正在尝试构建我的第一个井字游戏。我已经模拟了两个玩 MiniMax 策略的玩家。关键是玩家似乎选择了正确最大化自己效用的动作,但他们不会阻止对手连续 3 次。 我无法解释这种行为,因为我构建的代码使得每个玩家都将对手的成功视为自己的负面效用。我会很感激任何建议。

我的主要:


game = Tgame()
first_player = Minimax(game=game)
second_player = Minimax(game=game)

state = game.initial_state
moves = game.play(first_player, second_player)

我的搜索策略:

class Minimax:

    def __init__(self, game):
        self.game = game

#s,a are the state and the action used to reach it

    def max_value(self, state):
        if self.game.terminal_test(state):
            return self.game.player_utility(state)
        values = [self.min_value(s) for s, a in self.game.successors(state)]
        return max(values)

    def min_value(self, state):
        if self.game.terminal_test(state):
            return self.game.player_utility(state)
        values = [self.max_value(s) for s, a in self.game.successors(state)]
        return min(values)


    def next_move(self, state):
        moves = self.game.actions(state)
        return max(moves, key=lambda move: self.min_value(self.game.result(state, move)))

我的游戏:

import copy

class Game:
    def __init__(self, initial_state, player):
        self.initial_state = initial_state
        self.player = player

    def actions(self, state):
        return []

    def result(self, state, action):
        return action

    def successors(self, state):
        possible_actions = self.actions(state)
        return [(self.result(state, a), a) for a in possible_actions]

    def terminal_test(self, state):
        return False

    def utility(self, state):
        return 0


    def player_utility(self, state):
        if self.player == 'MAX':
            # for MAX player
            return self.utility(state)
        elif self.player == 'MIN':
            # for MIN player
            return -self.utility(state)
        else:
            raise ValueError


    def player_symbol(self):
        if self.player == 'MAX':
            return "x"
        elif self.player == 'MIN':
            return "o"
        else:
            raise ValueError


    def next_player(self):
        """
        Return the next player to move
        @return: MAX or MIN
        """
        if self.player == 'MAX':
            return 'MIN'
        else:
            return 'MAX'

    def play(self, player_one, player_two):

        state = self.initial_state
        players = [player_one, player_two]
        moves = []
        while True:
            for player in players:
                if self.terminal_test(state):
                    self.display(state)
                    print('----- GAME OVER -----\n\n')
                    return moves
                self.display(state)
                move = player.next_move(state)
                state = self.result(state, move)
                self.display_move(state, move)
                moves.append((move, self.player))
                self.player = self.next_player()
                print('_____________________')



    def display(self, state):
        print('_____________________')
        print(self.player)

    def display_move(self, state, move):
        print(self.player, f'--{move}--> ', state)
        self.printBoard(state)

    def printBoard(self,state):
        for row in state:
            print('\n')
            for col in row:
                print("[",col,"] ",end="")



class Tgame(Game):
    def __init__(self, initial_state=None, player='MAX'):
        super(DummyGame, self).__init__(initial_state, player)
        self.initial_state = [["","",""],["","",""],["","",""]]
        self.player = player

    def actions(self, state):
        actions = []
        for i in range(0,3):
            for j in range(0,3):
                newstate = copy.deepcopy(state)
                if newstate[i][j] == "":
                    newstate[i][j] = self.player_symbol()
                    actions.append(newstate)
        return actions


    def terminal_test(self, state):

        #check for alignments
        if self.utility(state) in [-1, 1]:
            return True

        #check for fully complete board
        for row in state:
            for col in row:
                if col == "":
                    return False

        return True

    def utility(self, state):
        #check goal on rows
        for row in state:
            utility = all(row[j] == "x" for j in range(0, 3))
            if utility:
                return 1

        #check goal on columns
        for col in range(0, 3):
            utility=all(row[col] == "x" for row in state)
            if utility:
                return 1

        #check goal on diagonals
        if state[0][0] == state[1][1] == state[2][2] == "x" or state[0][2] == state[1][1] == state[2][0] == "x":
            return 1

        #check goal on rows
        for row in state:
            utility=all(row[j] == "o" for j in range(0, 3))
            if utility:
                return -1

        #check goal on columns
        for col in range(0, 3):
            utility=all(row[col] == "o" for row in state)
            if utility:
                return -1

        #check goal on diagonalals
        if state[0][0] == state[1][1] == state[2][2] == "o" or state[0][2] == state[1][1] == state[2][0] == "o":
            return -1
        return 0

search tic-tac-toe minimax agent-based-modeling game-theory
© www.soinside.com 2019 - 2024. All rights reserved.