Search Unity

Question My agents don't seem to learn

Discussion in 'ML-Agents' started by erictr, Sep 7, 2020.

  1. erictr

    erictr

    Joined:
    Nov 18, 2016
    Posts:
    7
    Hello, I am trying to create an agent that can be able to move through a number of random platforms. I equipped the agent with multiple Ray Perception Sensors and tag 3 types of platforms (platform, respawn and goal). I used the pyramids example yaml file for the training. However after multiple attempts the agents don't seem to learn how to reach the goal. In my last attempt I reached 4860000 steps without good results. I hope some can help me, thanks.

    1.PNG 2.PNG

    3.PNG


    Code (CSharp):
    1. using System;
    2. using UnityEngine;
    3. using Unity.MLAgents;
    4. using Unity.MLAgents.Sensors;
    5.  
    6.  
    7. public class PlayerAgent : Agent
    8. {
    9.     private Vector3 startPosition;
    10.     private Rigidbody rb;
    11.     public float moveSpeed = 2;
    12.     public float rotationSpeed = 180;
    13.     public float jumpSpeed = 5;
    14.     public float fallingForce = 1f;
    15.     public float collisionForce = 15f;
    16.     //public Transform target;
    17.  
    18.     public GameObject destroyParticle;
    19.     private float previousDistance;
    20.     private float distance;
    21.  
    22.     public PlataformGenerator myplat;
    23.     public override void Initialize()
    24.     {
    25.         rb = GetComponent<Rigidbody>();
    26.         startPosition = transform.position;
    27.     }
    28.  
    29.     //  Called at the beginning of an Agent's episode, including at the beginning of the simulation.
    30.     public override void OnEpisodeBegin()
    31.     {
    32.         transform.position = new Vector3(startPosition.x + UnityEngine.Random.Range(-7, 7), startPosition.y, startPosition.z + UnityEngine.Random.Range(-7, 7));
    33.         transform.rotation = Quaternion.identity;
    34.         rb.velocity = Vector3.zero;
    35.         rb.angularVelocity = Vector3.zero;
    36.         previousDistance = transform.position.z;
    37.         myplat.Generate();
    38.  
    39.     }
    40.  
    41.     // Called every time the Agent receives an action to take. Receives the action chosen by the Agent.
    42.     // It is also common to assign a reward in this method.
    43.     public override void OnActionReceived(float[] vectorAction)
    44.     {
    45.         //Debug.Log("v" + vectorAction[0] + "h" + vectorAction[1] + "j" + vectorAction[2]);
    46.  
    47.         if (vectorAction[1] == 1)
    48.         {
    49.             Vector3 m_EulerAngleVelocity = new Vector3(0, -rotationSpeed, 0);
    50.             Quaternion deltaRotation = Quaternion.Euler(m_EulerAngleVelocity * Time.fixedDeltaTime);
    51.             rb.MoveRotation(rb.rotation * deltaRotation);
    52.  
    53.         }
    54.         else if (vectorAction[1]== 2)
    55.         {
    56.             Vector3 m_EulerAngleVelocity = new Vector3(0, rotationSpeed, 0);
    57.             Quaternion deltaRotation = Quaternion.Euler(m_EulerAngleVelocity * Time.fixedDeltaTime);
    58.             rb.MoveRotation(rb.rotation * deltaRotation);
    59.         }
    60.  
    61.         if (vectorAction[0] == 1)
    62.         {
    63.             rb.AddForce(transform.forward * moveSpeed, ForceMode.VelocityChange);
    64.         }
    65.  
    66.         if (!IsGrounded())
    67.         {
    68.             rb.AddForce(Vector3.down * fallingForce, ForceMode.Acceleration);
    69.         }
    70.  
    71.         if (vectorAction[2] == 1 && IsGrounded())
    72.         {
    73.             rb.velocity = (new Vector3(rb.velocity.x, jumpSpeed, rb.velocity.z));
    74.             AddReward(-0.01f);
    75.         }
    76.  
    77.         distance = transform.position.z;
    78.  
    79.         if (distance > previousDistance)
    80.         {
    81.             previousDistance = distance;
    82.             AddReward(0.1f);
    83.         }
    84.         //else
    85.         //{
    86.  
    87.         //}
    88.         AddReward(-0.01f);
    89.     }
    90.  
    91.     private bool IsGrounded()
    92.     {
    93.         RaycastHit hit;
    94.         Physics.Raycast(transform.position, Vector3.down, out hit);
    95.  
    96.         if (hit.distance < 0.55 && hit.collider.gameObject.tag != "Respawn")
    97.         {
    98.             return true;
    99.         }
    100.         else
    101.         {
    102.             return false;
    103.         }
    104.     }
    105.     //  Called every step that the Agent requests a decision. This is one possible way for
    106.     //collecting the Agent's observations of the environment; see Generating Observations below for more options.
    107.     public override void CollectObservations(VectorSensor sensor)
    108.     {
    109.  
    110.     }
    111.  
    112.     // method to generate the actions of the Agent. As such, the Heuristic() method writes to the array of
    113.     //floats provided to the Heuristic method as argument.
    114.     public override void Heuristic(float[] actionsOut)
    115.     {
    116.         Array.Clear(actionsOut, 0, actionsOut.Length);
    117.         if (Input.GetKey(KeyCode.W))
    118.         {
    119.             actionsOut[0] = 1f;
    120.         }
    121.         if (Input.GetKey(KeyCode.S))
    122.         {
    123.             actionsOut[0] = 2;
    124.         }
    125.  
    126.         if (Input.GetKey(KeyCode.A))
    127.         {
    128.             actionsOut[1] = 1;
    129.         }
    130.  
    131.         if (Input.GetKey(KeyCode.D))
    132.         {
    133.             actionsOut[1] = 2;
    134.         }
    135.  
    136.         actionsOut[2] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;
    137.     }
    138.  
    139.     private void OnCollisionEnter(Collision collision)
    140.     {
    141.         if (collision.gameObject.tag == "Finish")
    142.         {
    143.             AddReward(1);
    144.             EndEpisode();
    145.         }
    146.  
    147.         if (collision.gameObject.tag == "Respawn")
    148.         {
    149.             Instantiate(destroyParticle, transform.position, transform.rotation);
    150.             AddReward(-1f);
    151.             EndEpisode();
    152.         }
    153.  
    154.         if (collision.gameObject.tag == "Player")
    155.         {
    156.             Vector3 position = collision.gameObject.transform.position - transform.position;
    157.             rb.AddForce(-position.normalized * collisionForce, ForceMode.VelocityChange);
    158.             //rb.angularVelocity = Vector3.zero;
    159.         }
    160.     }
    161. }
    6.PNG 5.PNG
     
    Last edited: Sep 7, 2020
  2. erictr

    erictr

    Joined:
    Nov 18, 2016
    Posts:
    7
  3. Luke-Houlihan

    Luke-Houlihan

    Joined:
    Jun 26, 2007
    Posts:
    303
    @erictr It doesn't look like you're giving the agent any information except the Ray sensors, this may not tell the agent everything it needs to know. You might try adding some additional vector observations like relative velocity and maybe forward direction in local euler angles.

    When in doubt I tend to add all the observations I think the agent might find useful just to get the agent training, then remove possibly unnecessary observations to make training faster. Adding more observations makes training slower but not having the observations the agents needs makes training impossible.
     
  4. Luke-Houlihan

    Luke-Houlihan

    Joined:
    Jun 26, 2007
    Posts:
    303
    Also if you're using PPO and randomizing a complex environment it may take more than 5 million steps to get acceptable behavior. One of my more complex agents needs 30 million steps or more just to explore the full reward space.