Search Unity

Bug My Agent do stupid things in a simple enviroment

Discussion in 'ML-Agents' started by Ryato03, Feb 10, 2024.

  1. Ryato03

    Ryato03

    Joined:
    Jan 30, 2024
    Posts:
    2
    HI! I'm recently studying ML-Agent and many of its applications.
    I recently created a new simple project in which a cube must move within a room with the aim of collecting an object that moves with each collection.
    I implemented movement along one direction, rotation and jumping.
    When I start the training the cube jumps immediately without ever stopping, it doesn't jump on the spot but it NEVER moves close to the ground.

    This is the code:

    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Sensors;
    6. using Unity.MLAgents.Actuators;
    7.  
    8. public class PinoScript : Agent
    9. {
    10.     // Variabili per i componenti e gli oggetti
    11.     Rigidbody PinoBody;
    12.  
    13.     [Header("Movement")]
    14.     public float MovementForce;
    15.     public float RotationForce;
    16.     public float MaxSpeedRotation;
    17.  
    18.     [Header("Jump")]
    19.     [SerializeField] private float JumpForce;
    20.  
    21.     public Transform Target;
    22.     bool OnGround = true;
    23.  
    24.     internal Vector3 startPos;
    25.     internal Vector3 startRot;
    26.  
    27.     // Start is called before the first frame update
    28.     void Start()
    29.     {
    30.         // Assegna il componente Rigidbody a PinoBody
    31.         PinoBody = GetComponent<Rigidbody>();
    32.         startPos = transform.position;
    33.         startRot = transform.eulerAngles;
    34.     }
    35.  
    36.     public override void OnEpisodeBegin()
    37.     {
    38.         //Resetta Pino
    39.         ResetPino();
    40.         // Sposta il target in una posizione casuale
    41.         MoveTarget();
    42.     }
    43.  
    44.     public override void CollectObservations(VectorSensor sensor)
    45.     {
    46.         //Temporaneamente Vuoto
    47.     }
    48.  
    49.     public override void OnActionReceived(ActionBuffers actionBuffers)
    50.     {
    51.         int MovementAction = Mathf.FloorToInt(actionBuffers.DiscreteActions[0]);
    52.         int RotationAction = Mathf.FloorToInt(actionBuffers.DiscreteActions[1]);
    53.         int JumpAction = Mathf.FloorToInt(actionBuffers.DiscreteActions[2]);
    54.         Vector3 RotateDir = transform.forward * MovementAction * RotationForce;
    55.         Vector3 MoveDir = Vector3.zero;
    56.  
    57.         switch (MovementAction)
    58.         {
    59.             case 0: //Avanti
    60.                 PinoBody.AddForce(transform.forward * MovementForce);
    61.                 break;
    62.             case 1:
    63.                 PinoBody.AddForce(-transform.forward * MovementForce);
    64.                 break;
    65.             case 2:
    66.                 MoveDir = Vector3.zero;
    67.                 break;
    68.         }
    69.  
    70.         switch (RotationAction)
    71.         {
    72.             case 0: //Avanti
    73.                 PinoBody.AddTorque(Vector3.up * RotationForce);
    74.                 break;
    75.             case 1:
    76.                 PinoBody.AddTorque(-Vector3.up * RotationForce);
    77.                 break;
    78.             case 2:
    79.                 RotateDir = Vector3.zero;
    80.                 break;
    81.         }
    82.  
    83.         // Limit angular velocity
    84.         if (PinoBody.angularVelocity.magnitude > MaxSpeedRotation)
    85.         {
    86.             PinoBody.angularVelocity = PinoBody.angularVelocity.normalized * MaxSpeedRotation;
    87.         }
    88.  
    89.         if (JumpAction == 1 && OnGround)
    90.         {
    91.             PinoBody.AddForce(Vector3.up * JumpForce, ForceMode.Impulse);
    92.             OnGround = false;
    93.  
    94.             // Applica penalità per il salto
    95.             AddReward(-5f);
    96.             Debug.Log("Penalità Salto: " + -5f);
    97.         }
    98.  
    99.         AddReward(-0.0005f);
    100.         //Debug.Log("Penalità Tempo: " + -0.0005f);
    101.  
    102.         if (transform.position.y < -1.3f)
    103.         {
    104.             AddReward(-0.5f);
    105.             Debug.Log("Penalità Caduta: " + -0.5f);
    106.             EndEpisode();
    107.         }
    108.     }
    109.  
    110.  
    111.     public void OnTriggerEnter(Collider trigger)
    112.     {
    113.         // Se Pino raggiunge il target, assegna una ricompensa e termina l'episodio
    114.         if (trigger.gameObject.tag == "PinoTarget")
    115.         {
    116.             AddReward(5f);
    117.             Debug.Log("Ricompensa Target: " + 5f);
    118.             MoveTarget();
    119.         }
    120.     }
    121.  
    122.     public void OnCollisionEnter(Collision collision)
    123.     {
    124.  
    125.         if (collision.gameObject.tag == "Muri")
    126.         {
    127.             AddReward(-1f);
    128.             Debug.Log("Penalità Muri assegnata: " + -1f);
    129.         }
    130.  
    131.         if (collision.gameObject.tag == "Pavimento")
    132.         {
    133.             OnGround = true;
    134.         }
    135.     }
    136.  
    137.     // Funzione che resetta Pino alla posizione e rotazione iniziali
    138.     void ResetPino()
    139.     {
    140.         //Reset della posizione e rotazione
    141.         transform.position = startPos;
    142.         transform.eulerAngles = startRot;
    143.  
    144.         //Reset della velocità
    145.         PinoBody.velocity = Vector3.zero;
    146.         PinoBody.angularVelocity = Vector3.zero;
    147.  
    148.     }
    149.  
    150.     // Funzione che sposta il target in una posizione casuale
    151.     void MoveTarget()
    152.     {
    153.         Target.position = new Vector3(Random.value * 18 - 9f,
    154.                                       1.1f,
    155.                                       Random.value * 8 - 4);
    156.     }
    157.  
    158.     public override void Heuristic(in ActionBuffers actionsOut)
    159.     {
    160.         var discreteActionsOut = actionsOut.DiscreteActions;
    161.  
    162.         // Azzera le azioni per evitare movimenti residui
    163.         discreteActionsOut.Clear();
    164.  
    165.         // Movimento avanti/indietro (0: fermo, 1: avanti, 2: indietro)
    166.         if (Input.GetKey(KeyCode.W))
    167.         {
    168.             discreteActionsOut[0] = 0;
    169.         }
    170.         else if (Input.GetKey(KeyCode.S))
    171.         {
    172.             discreteActionsOut[0] = 1;
    173.         }
    174.         else
    175.         {
    176.             discreteActionsOut[0] = 2;
    177.         }
    178.  
    179.         // Rotazione destra/sinistra (0: fermo, 1: sinistra, 2: destra)
    180.         if (Input.GetKey(KeyCode.A))
    181.         {
    182.             discreteActionsOut[1] = 0;
    183.         }
    184.         else if (Input.GetKey(KeyCode.D))
    185.         {
    186.             discreteActionsOut[1] = 1;
    187.         }
    188.         else
    189.         {
    190.             discreteActionsOut[1] = 2;
    191.         }
    192.  
    193.         // Salto (0: non saltare, 1: saltare)
    194.         if (Input.GetKey(KeyCode.Space))
    195.         {
    196.             discreteActionsOut[2] = 1;
    197.         }
    198.         else
    199.         {
    200.             discreteActionsOut[2] = 0;
    201.         }
    202.     }
    203. }
    Can someone explain to me why? it's right for him to jump but I would like him to do it logically
     
  2. smallg2023

    smallg2023

    Joined:
    Sep 2, 2018
    Posts:
    144
    the code looks like it should work ok as far as only jumping when on the ground etc so do you mean it jumps a lot?
    this is normal, when training it will just do random actions so unless you punish it for excess jumping (or reward it for staying on the ground etc) it's going to jump a lot, i see you are trying to punish it for jumping already, it should help reduce how much it jumps after a bit of training.

    other than the above methods another method is to use action masks and a timer to enable the discrete action to let it jump only after it's been on the ground for a while or if you want to really control the logic give it set areas it can jump in etc
     
  3. Ryato03

    Ryato03

    Joined:
    Jan 30, 2024
    Posts:
    2
    I tried to let him train for almost 30 minutes but he kept jumping. he never moved without jumping.
    I wouldn’t know what limits to set for him, for now the environment is very simple and without obstacles, I wanted to train him momentarily without obstacles and then add them later after he learned to move decently.
    So the limited areas of jumping do not have much use and perhaps not even that of the time because if there are areas with obstacles one behind the other he can not jump continuously