国际象棋机器人未达到预期水平 - 蒙特卡洛树搜索

问题描述 投票:0回答:0

我正在为 Sebastian Lague 的“Tiny Chess Bots”竞赛创建一个国际象棋机器人。 它使用带有上置信界限的蒙特卡洛树搜索,问题是它的表现非常糟糕 - 以至于我怀疑代码是否正确。

我唯一的代码在这个 C# Mybot.cs 文件中,MCTS 实现位于底部。

public static class Rand{
    public static Random random = new ();
}

public class Node{
    public int visits = 2;
    public double score = 1;
    public Move move;
    public Node parent;
    public List<Node> children = new List<Node>{};
    //public Node LastBestChild = null!;

    public Node(Move m, Node p){
        move = m; //move is from parent to node
        parent = p;
    }

    public void ExpandNode(Board b){
        Move[] moves = b.GetLegalMoves();
        for(int l=0; l<moves.Length; l++){
            children.Add(new Node(moves[l], this));
        }
    }

    public void Update(double result){
        visits += 1;
        //result: 0 for loss and 1 for win and 0.5 for draw
        score +=  result;

        }

    public bool IsLeaf(){
        return (children.Count == 0);
    }

    public bool HasParent(){
        return (parent != null);
    }

    public double UCBValue(){
        return ((score/visits)+ 0.4*Math.Sqrt(Math.Log(parent.visits)/visits));
    }

    public Node MaxUCB(){
        double maxVal = children[0].UCBValue();
        Node maxNode = children[0];
        foreach(Node n in children){
            double temp = n.UCBValue();
            //Console.WriteLine(n.move.ToString() + ": UCB = " + temp.ToString());
            if(temp > maxVal || (temp == maxVal && Rand.random.Next(0,1) == 0)){
                maxVal = temp;
                maxNode = n;
            }
        }
        return maxNode;
    }

    public Node MaxVisits(){
        double maxVal = children[0].visits;
        Node maxNode = children[0];
        foreach(Node n in children){
            Console.WriteLine(n.move.ToString() + ": visits = " + n.visits.ToString() + ": score = " + n.score.ToString());
            double temp = n.visits;
            if(temp > maxVal || (temp == maxVal && Rand.random.Next(0,1) == 0)){
                maxVal = temp;
                maxNode = n;
            }
        }
        Console.WriteLine("###########################################################################");
        return maxNode;
    }

};



public class MyBot : IChessBot
{
    public Move Think(Board board, Timer timer)
    {
        Move[] moves = board.GetLegalMoves();
        Node root = new Node(Move.NullMove, null!);

        for(int i = 0; i < 20000; i++){
            Board board1 = Board.CreateBoardFromFEN(board.GetFenString());
            Node currentNode = root;
            
            //Select
            while (!currentNode.IsLeaf()){
                currentNode = currentNode.MaxUCB();
                
                //Update Board when moving from Node to Node
                board1.MakeMove(currentNode.move);
            }

            //Expansion
            if (board1.GetLegalMoves().Length > 0){
                currentNode.ExpandNode(board1);
                currentNode = currentNode.children[Rand.random.Next(0, currentNode.children.Count)];
                
                //Update Board when moving from Node to Node
                board1.MakeMove(currentNode.move);
            }


            //Rollouts
            Move[] legalMoves = board1.GetLegalMoves();
            int numCurrentMoves = legalMoves.Length;
            bool colour = board.IsWhiteToMove;

            while (!board1.IsInCheckmate() && !board1.IsDraw()){ 
                Move moveToMake = legalMoves[Rand.random.Next(0, numCurrentMoves)];
            
                //Update board
                board1.MakeMove(moveToMake);
            
                legalMoves = board1.GetLegalMoves();
                numCurrentMoves = legalMoves.Length;

                colour  = !colour;
            }

            //Get results
            double result =0;
            if(board1.IsDraw()){
                result = 0.5;
            } else{
                if (board1.IsWhiteToMove == colour){
                    result = 1;
                }
            }

            //Back propagation
            while (currentNode.HasParent()){
                currentNode.Update(result);
                currentNode = currentNode.parent;
                result = 1-result;
            }
            currentNode.Update(result);


        }

        //Select Best Move
        Move bestMove = root.MaxVisits().move;
        return bestMove;
    }
}

我有理由怀疑我的代码不正确,因为即使将迭代次数增加到令人发指的高数量(400,000 1,000,000+),引擎仍然会犯错误,比如在第 4 步上挂皇后。

要尝试解决此问题,我有:

  1. 倒置的布尔标志 - 也许我只是多次否定一个标志,并且最大化而不是最小化我的损失。
  2. 记录了顶级节点的分数和访问量 - 没有显示太多。
  3. 调整了迭代大小和探索常数(在 UCBValue 函数中)。也没有太大改变。
c# artificial-intelligence reinforcement-learning chess monte-carlo-tree-search
© www.soinside.com 2019 - 2024. All rights reserved.