我正在为 Sebastian Lague 的“Tiny Chess Bots”竞赛创建一个国际象棋机器人。 它使用带有上置信界限的蒙特卡洛树搜索,问题是它的表现非常糟糕 - 以至于我怀疑代码是否正确。
我唯一的代码在这个 C# Mybot.cs 文件中,MCTS 实现位于底部。
public static class Rand{
public static Random random = new ();
}
public class Node{
public int visits = 2;
public double score = 1;
public Move move;
public Node parent;
public List<Node> children = new List<Node>{};
//public Node LastBestChild = null!;
public Node(Move m, Node p){
move = m; //move is from parent to node
parent = p;
}
public void ExpandNode(Board b){
Move[] moves = b.GetLegalMoves();
for(int l=0; l<moves.Length; l++){
children.Add(new Node(moves[l], this));
}
}
public void Update(double result){
visits += 1;
//result: 0 for loss and 1 for win and 0.5 for draw
score += result;
}
public bool IsLeaf(){
return (children.Count == 0);
}
public bool HasParent(){
return (parent != null);
}
public double UCBValue(){
return ((score/visits)+ 0.4*Math.Sqrt(Math.Log(parent.visits)/visits));
}
public Node MaxUCB(){
double maxVal = children[0].UCBValue();
Node maxNode = children[0];
foreach(Node n in children){
double temp = n.UCBValue();
//Console.WriteLine(n.move.ToString() + ": UCB = " + temp.ToString());
if(temp > maxVal || (temp == maxVal && Rand.random.Next(0,1) == 0)){
maxVal = temp;
maxNode = n;
}
}
return maxNode;
}
public Node MaxVisits(){
double maxVal = children[0].visits;
Node maxNode = children[0];
foreach(Node n in children){
Console.WriteLine(n.move.ToString() + ": visits = " + n.visits.ToString() + ": score = " + n.score.ToString());
double temp = n.visits;
if(temp > maxVal || (temp == maxVal && Rand.random.Next(0,1) == 0)){
maxVal = temp;
maxNode = n;
}
}
Console.WriteLine("###########################################################################");
return maxNode;
}
};
public class MyBot : IChessBot
{
public Move Think(Board board, Timer timer)
{
Move[] moves = board.GetLegalMoves();
Node root = new Node(Move.NullMove, null!);
for(int i = 0; i < 20000; i++){
Board board1 = Board.CreateBoardFromFEN(board.GetFenString());
Node currentNode = root;
//Select
while (!currentNode.IsLeaf()){
currentNode = currentNode.MaxUCB();
//Update Board when moving from Node to Node
board1.MakeMove(currentNode.move);
}
//Expansion
if (board1.GetLegalMoves().Length > 0){
currentNode.ExpandNode(board1);
currentNode = currentNode.children[Rand.random.Next(0, currentNode.children.Count)];
//Update Board when moving from Node to Node
board1.MakeMove(currentNode.move);
}
//Rollouts
Move[] legalMoves = board1.GetLegalMoves();
int numCurrentMoves = legalMoves.Length;
bool colour = board.IsWhiteToMove;
while (!board1.IsInCheckmate() && !board1.IsDraw()){
Move moveToMake = legalMoves[Rand.random.Next(0, numCurrentMoves)];
//Update board
board1.MakeMove(moveToMake);
legalMoves = board1.GetLegalMoves();
numCurrentMoves = legalMoves.Length;
colour = !colour;
}
//Get results
double result =0;
if(board1.IsDraw()){
result = 0.5;
} else{
if (board1.IsWhiteToMove == colour){
result = 1;
}
}
//Back propagation
while (currentNode.HasParent()){
currentNode.Update(result);
currentNode = currentNode.parent;
result = 1-result;
}
currentNode.Update(result);
}
//Select Best Move
Move bestMove = root.MaxVisits().move;
return bestMove;
}
}
我有理由怀疑我的代码不正确,因为即使将迭代次数增加到令人发指的高数量(400,000 1,000,000+),引擎仍然会犯错误,比如在第 4 步上挂皇后。
要尝试解决此问题,我有: