我正在尝试构建我的第一个井字游戏。我已经模拟了两个玩 MiniMax 策略的玩家。关键是玩家似乎选择了正确最大化自己效用的动作,但他们不会阻止对手连续 3 次。 我无法解释这种行为,因为我构建的代码使得每个玩家都将对手的成功视为自己的负面效用。我会很感激任何建议。
我的主要:
game = Tgame()
first_player = Minimax(game=game)
second_player = Minimax(game=game)
state = game.initial_state
moves = game.play(first_player, second_player)
我的搜索策略:
class Minimax:
def __init__(self, game):
self.game = game
#s,a are the state and the action used to reach it
def max_value(self, state):
if self.game.terminal_test(state):
return self.game.player_utility(state)
values = [self.min_value(s) for s, a in self.game.successors(state)]
return max(values)
def min_value(self, state):
if self.game.terminal_test(state):
return self.game.player_utility(state)
values = [self.max_value(s) for s, a in self.game.successors(state)]
return min(values)
def next_move(self, state):
moves = self.game.actions(state)
return max(moves, key=lambda move: self.min_value(self.game.result(state, move)))
我的游戏:
import copy
class Game:
def __init__(self, initial_state, player):
self.initial_state = initial_state
self.player = player
def actions(self, state):
return []
def result(self, state, action):
return action
def successors(self, state):
possible_actions = self.actions(state)
return [(self.result(state, a), a) for a in possible_actions]
def terminal_test(self, state):
return False
def utility(self, state):
return 0
def player_utility(self, state):
if self.player == 'MAX':
# for MAX player
return self.utility(state)
elif self.player == 'MIN':
# for MIN player
return -self.utility(state)
else:
raise ValueError
def player_symbol(self):
if self.player == 'MAX':
return "x"
elif self.player == 'MIN':
return "o"
else:
raise ValueError
def next_player(self):
"""
Return the next player to move
@return: MAX or MIN
"""
if self.player == 'MAX':
return 'MIN'
else:
return 'MAX'
def play(self, player_one, player_two):
state = self.initial_state
players = [player_one, player_two]
moves = []
while True:
for player in players:
if self.terminal_test(state):
self.display(state)
print('----- GAME OVER -----\n\n')
return moves
self.display(state)
move = player.next_move(state)
state = self.result(state, move)
self.display_move(state, move)
moves.append((move, self.player))
self.player = self.next_player()
print('_____________________')
def display(self, state):
print('_____________________')
print(self.player)
def display_move(self, state, move):
print(self.player, f'--{move}--> ', state)
self.printBoard(state)
def printBoard(self,state):
for row in state:
print('\n')
for col in row:
print("[",col,"] ",end="")
class Tgame(Game):
def __init__(self, initial_state=None, player='MAX'):
super(DummyGame, self).__init__(initial_state, player)
self.initial_state = [["","",""],["","",""],["","",""]]
self.player = player
def actions(self, state):
actions = []
for i in range(0,3):
for j in range(0,3):
newstate = copy.deepcopy(state)
if newstate[i][j] == "":
newstate[i][j] = self.player_symbol()
actions.append(newstate)
return actions
def terminal_test(self, state):
#check for alignments
if self.utility(state) in [-1, 1]:
return True
#check for fully complete board
for row in state:
for col in row:
if col == "":
return False
return True
def utility(self, state):
#check goal on rows
for row in state:
utility = all(row[j] == "x" for j in range(0, 3))
if utility:
return 1
#check goal on columns
for col in range(0, 3):
utility=all(row[col] == "x" for row in state)
if utility:
return 1
#check goal on diagonals
if state[0][0] == state[1][1] == state[2][2] == "x" or state[0][2] == state[1][1] == state[2][0] == "x":
return 1
#check goal on rows
for row in state:
utility=all(row[j] == "o" for j in range(0, 3))
if utility:
return -1
#check goal on columns
for col in range(0, 3):
utility=all(row[col] == "o" for row in state)
if utility:
return -1
#check goal on diagonalals
if state[0][0] == state[1][1] == state[2][2] == "o" or state[0][2] == state[1][1] == state[2][0] == "o":
return -1
return 0