我不熟悉使用 Unity 和 ML-Agents。我已经为我的模型创建了一个环境并开始在那里进行训练。然而,我的模型在脚本中的每时每刻都在跳跃,即使在没有必要的时候也是如此。我已经给出了负面反馈,以防止它不必要地跳跃,但它一直在跳跃。我该如何解决这个问题?
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class MovetoGoal : Agent
{
[SerializeField] private Transform targetTransform;
[SerializeField] private Transform restart;
[SerializeField] private Rigidbody2D rb;
public float JumpPower = 0.1f;
private bool isGrounded = true;
private float previousDistanceToPlayer;
private float totalReward;
public override void OnEpisodeBegin()
{
transform.position = restart.transform.position;
}
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(transform.localPosition);
sensor.AddObservation(targetTransform.localPosition);
sensor.AddObservation(rb.velocity);
}
public override void OnActionReceived(ActionBuffers actions)
{
float moveX = actions.ContinuousActions[0];
int jump = actions.DiscreteActions[0];
rb.velocity = new Vector2(moveX * 8f, rb.velocity.y);
if (jump != 0 && isGrounded)
{
rb.velocity = new Vector2(rb.velocity.x, JumpPower);
isGrounded = false;
SetReward(-100f);
totalReward = totalReward - 1f;
}
float distanceToPlayer = Vector2.Distance(transform.position, targetTransform.position);
// If the agent gets too far away from the player, give a negative reward
if (distanceToPlayer >= previousDistanceToPlayer)
{
SetReward(-0.1f);
totalReward = totalReward - 0.1f;
}
// If the agent gets closer to the player, give a positive reward
if (distanceToPlayer < previousDistanceToPlayer)
{
SetReward(0.1f);
totalReward = totalReward + 0.1f;
}
// Update the previous distance to the player for the next time step
previousDistanceToPlayer = distanceToPlayer;
}
public override void Heuristic(in ActionBuffers actionsOut)
{
ActionSegment<float> continousActions = actionsOut.ContinuousActions;
continousActions[0] = Input.GetAxisRaw("Horizontal");
}
private void OnTriggerEnter2D(Collider2D other)
{
if (other.gameObject.tag == "Goal")
{
//Debug.Log(totalReward);
SetReward(+1f);
totalReward = totalReward + 1f;
EndEpisode();
}
if (other.gameObject.tag == "Wall")
{
//Debug.Log(totalReward);
SetReward(-1f);
totalReward = totalReward - 1f;
EndEpisode();
}
}
void OnCollisionEnter2D(Collision2D col)
{
if (col.gameObject.tag == "Ground")
{
isGrounded = true;
}
}
}
这是模型脚本。我可以做任何改进吗?