Search Unity

Question Agents Random Behavior

Discussion in 'ML-Agents' started by AdiRidA, Sep 13, 2020.

  1. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    Hello, I started learning how to use ML Agents a couple of days ago, and I have set up a simple reinforcement learning environment for an AI driver.

    The environment has a straight road. The agent starts at the edge of the road facing the other edge, and there's a checkpoint about 10 units from the agent.
    The checkpoints are empty game objects visualized with gizmos. The checkpoints are also childed to an empty game object with a component named "Race Track". This component handles the behavior of the checkpoints.
    If there's more than one checkpoint objects childed to this Race Track object, they form a track.

    In the current environment, I use a single checkpoint. The agent is supposed to drive towards the checkpoint.
    For the most part it does, but I'm not sure if it's because it's too close to it and can't miss it, or because it actually behaves like it should to get to the checkpoint.

    Over an hour into the training, when looking at the agent moving, it sometimes drives straight forward towards the checkpoint, and sometimes it turns to the left or right and randomly miss it and fall out of the environment.



    I'm not sure why it does that, because as far as I understand, with reinforcement learning the agent is supposed to figure out what action gives it the most rewards, and it should eventually figure out how consistently get rewards. That is how I set my environment, getting to the checkpoint, gives the agent the highest reward, while steering off and falling takes away the highest reward.

    These are the result graphs on my TensorBoard:
    board.png

    This is the agent class, DriverAgent:
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Sensors;
    6.  
    7. public class DriverAgent : Agent
    8. {
    9.     [SerializeField] RaceTrack raceTrack;
    10.     [SerializeField] float waypointDistanceThreshold = 1.42f;
    11.  
    12.     VehicleMovement vehicleMovement;
    13.     Rigidbody rigidBody;
    14.  
    15.     Vector3 initialAgentPosition;
    16.     Quaternion initialAgentRotation;
    17.     int currentWaypointIndex = 0;
    18.     Vector3 dirToTarget;
    19.  
    20.     public override void Initialize()
    21.     {
    22.         vehicleMovement = GetComponent<VehicleMovement>();
    23.         rigidBody = GetComponent<Rigidbody>();
    24.  
    25.         initialAgentPosition = transform.localPosition;
    26.         initialAgentRotation = transform.localRotation;
    27.  
    28.         currentWaypointIndex = raceTrack.FirstWaypointIndex;
    29.     }
    30.  
    31.     public override void OnEpisodeBegin()
    32.     {
    33.         // Reset agent velocity
    34.         rigidBody.angularVelocity = Vector3.zero;
    35.         rigidBody.velocity = Vector3.zero;
    36.  
    37.         // Reset the agent to the initial starting position and rotation
    38.         transform.localPosition = initialAgentPosition;
    39.         transform.localRotation = initialAgentRotation;
    40.  
    41.         // Reset current waypoint index
    42.         currentWaypointIndex = raceTrack.FirstWaypointIndex;
    43.     }
    44.  
    45.     public override void CollectObservations(VectorSensor sensor)
    46.     {
    47.         // Agent
    48.         sensor.AddObservation(vehicleMovement.Speed);
    49.         sensor.AddObservation(transform.position.normalized);
    50.         sensor.AddObservation(transform.InverseTransformVector(rigidBody.velocity.normalized));
    51.         sensor.AddObservation(transform.forward);
    52.         sensor.AddObservation(transform.right);
    53.  
    54.         // Waypoints
    55.         if(raceTrack != null)
    56.         {
    57.             sensor.AddObservation(raceTrack.GetWaypointPosition(currentWaypointIndex));
    58.             dirToTarget = (raceTrack.GetWaypointPosition(currentWaypointIndex) - transform.position).normalized;
    59.             sensor.AddObservation(transform.InverseTransformDirection(dirToTarget));
    60.         }
    61.  
    62.     }
    63.  
    64.     private void OnCollisionEnter(Collision other)
    65.     {
    66.         if(other.gameObject.CompareTag("Wall") || other.gameObject.CompareTag("Obstacle") || other.gameObject.CompareTag("Vehicle"))
    67.         {
    68.             AddReward(-0.01f);
    69.             Debug.Log("Collided with " + other.gameObject.tag);
    70.             EndEpisode();
    71.         }
    72.     }
    73.  
    74.     public void MoveAgent(float[] vectorAction)
    75.     {
    76.         float acceleration = 0;
    77.         float steering = 0;
    78.         float braking = 0;
    79.  
    80.         acceleration = vectorAction[0];
    81.         steering = vectorAction[1];
    82.         braking = vectorAction[2];
    83.  
    84.         vehicleMovement.Accelerate(acceleration);
    85.         vehicleMovement.Steer(steering);
    86.         vehicleMovement.Brake(braking);
    87.  
    88.         // Increase reward when moving toward waypoint
    89.         float velocityAlignment = Vector3.Dot(dirToTarget, rigidBody.velocity);
    90.         AddReward(0.001f * velocityAlignment);
    91.     }
    92.  
    93.     public override void OnActionReceived(float[] vectorAction)
    94.     {
    95.         MoveAgent(vectorAction);
    96.  
    97.         // Distance from agent to current target waypoint
    98.         float distanceToTarget = Vector3.Distance(transform.position, raceTrack.GetWaypointPosition(currentWaypointIndex));
    99.  
    100.         // If reached LAST target waypoint
    101.         if (distanceToTarget < waypointDistanceThreshold && raceTrack.GetNextIndex(currentWaypointIndex) == 0)
    102.         {
    103.             SetReward(1.0f);
    104.             Debug.Log("Reached latest waypoint (" + raceTrack.GetWaypoint(currentWaypointIndex).name + ") - Episode ended");
    105.             EndEpisode();
    106.         }
    107.         // If reached target waypoint
    108.         else if(distanceToTarget < waypointDistanceThreshold)
    109.         {
    110.             SetReward(0.5f);
    111.             Debug.Log("Reached waypoint: " + raceTrack.GetWaypoint(currentWaypointIndex).name);
    112.             currentWaypointIndex++;
    113.         }
    114.  
    115.         // If fell down
    116.         if (transform.localPosition.y < 0)
    117.         {
    118.             SetReward(-0.25f);
    119.             Debug.Log("Fell down");
    120.             EndEpisode();
    121.         }
    122.     }
    123.  
    124.     public override void Heuristic(float[] actionsOut)
    125.     {
    126.         actionsOut[0] = Input.GetAxis("Vertical");
    127.         actionsOut[1] = Input.GetAxis("Horizontal");
    128.  
    129.         actionsOut[2] = 0;
    130.         if(Input.GetKey(KeyCode.Space)) actionsOut[2] = 1;
    131.     }
    132. }
    133.  


    I really appreciate any help.
    Thanks!

    EDIT: It seems like the awards are stabilized and are more consistent. But they still steer away from the checkpoint.
    IMG_20200913_163717.jpg

    EDIT 2: I moved the checkpoint and increased the distance between it and the agent, and it seems like they handle it. Although, there seemed to be a big drop in rewards which got better right after resuming the training.
    1.png

    EDIT 3: It seems like the "random" drops in reward still happen, I'm not sure why.
    There was a drop right after resuming training right after the 5 million mark.
    Then, there was a drop during training after the 5.5 million mark.
    upload_2020-9-13_18-5-5.png
     
    Last edited: Sep 14, 2020
  2. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    I'm not sure, but can the rewards confuse the agent off his goal? This is the current reward setup. It is shown in the code in the main post.


    Decrease reward by -0.01f when collided with wall, obstacle or vehicle.

    Increase rewards when moving toward waypoint
    0.001f * Vector3.Dot(dirToTarget, rigidBody.velocity).

    Set reward to 1 when reached last waypoint.

    Set reward to 0.5 when reached any waypoint not last.

    Set reward to -0.25 when fell down.
     
    Last edited: Sep 14, 2020
  3. andrewcoh_unity

    andrewcoh_unity

    Unity Technologies

    Joined:
    Sep 5, 2019
    Posts:
    162
    It's possible this line is causing an issue

    Increase rewards when moving toward waypoint - 0.001f * Vector3.Dot(dirToTarget, rigidBody.velocity)

    When the agent moves past the waypoint, it's no longer facing the direction of the waypoint and getting negative reward. It may actually be more valuable to end an episode and get -.25 then to remain alive and accumulate the negative reward. Try removing that and leaving all else the same.
     
    FullMe7alJacke7 likes this.
  4. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    Thank you for your reply :)

    I will try removing the line.

    Is what you described the case even if the waypoints index is updated when an action is received from the agent?

    When the agent reaches a waypoint, the index either goes up by 1, or gets reset to 0 based on if it is the last waypoint or not.

    If the agent has passed a waypoint that is not the last one (the next waypoint index is not 0), 1 will be added to the current index value, so if the index is 1, it will be 2.

    If it's the last waypoint (the next waypoint index is 0), it ends the episode and resets the currentWaypointIndex to 0.

    This is what I check in this block of code in the OnActionReceived method:

    Code (CSharp):
    1.  
    2. // If reached LAST target waypoint
    3. if (distanceToTarget < waypointDistanceThreshold && raceTrack.GetNextIndex(currentWaypointIndex) == 0)
    4.         {
    5.             SetReward(1.0f);
    6.             Debug.Log("Reached latest waypoint (" + raceTrack.GetWaypoint(currentWaypointIndex).name + ") - Episode ended");
    7.             EndEpisode();
    8.         }
    9.         // If reached target waypoint
    10.         else if(distanceToTarget < waypointDistanceThreshold)
    11.         {
    12.             SetReward(0.5f);
    13.             Debug.Log("Reached waypoint: " + raceTrack.GetWaypoint(currentWaypointIndex).name);
    14.             currentWaypointIndex++;
    15.         }
    Edit: Actually, I think I understand you're theory.
    Please, correct me if I'm wrong.
    When just driving towards the waypoint, as fast and as direct as possible, they earn more rewards.

    When getting to the waypoint, the reward is just being set (SetReward) to 1, which is probably less than how much they would be getting by just driving towards the waypoint.
     
    Last edited: Sep 15, 2020
  5. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    Hello!
    I tried your suggestion, and commented that specific line in the code, and the simulation has been running for about 7 hours now.
    While the graphs seems kinda better, the agent itself is still sometimes steering off its target.
    At start it steers to the left, then to the right, then sometimes it doesn't steer away at all.

    This is what it looked like when the agent was steering left:

    Board:

    123L BOARD.png
    Console:
    upload_2020-9-15_13-25-54.png

    This is what it looked like when the agent was steering right:

    Board:
    123R BOARD.png
    Console:

    upload_2020-9-15_13-25-54.png
     

    Attached Files:

    Last edited: Sep 15, 2020
  6. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    UPDATE 2:
    I replaced the waypoints system with a cube like in the Roller Ball tutorial project on the ML-Agents Github repository.
    It seems like they're tracking the target a lot better now.

    They do now what they couldn't do after 6 hours with the waypoints system
    123.png

    This is the new script using only a cube:
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Sensors;
    6.  
    7. public class RollerAgent : Agent
    8. {
    9.     [SerializeField] Transform target;
    10.     [SerializeField] float waypointDistanceThreshold = 1.42f;
    11.  
    12.     VehicleMovement vehicleMovement;
    13.     Rigidbody rigidBody;
    14.  
    15.     Vector3 initialAgentPosition;
    16.     Quaternion initialAgentRotation;
    17.  
    18.     public override void Initialize()
    19.     {
    20.         vehicleMovement = GetComponent<VehicleMovement>();
    21.         rigidBody = GetComponent<Rigidbody>();
    22.  
    23.         initialAgentPosition = transform.localPosition;
    24.         initialAgentRotation = transform.localRotation;
    25.     }
    26.  
    27.     public override void OnEpisodeBegin()
    28.     {
    29.         // If the Agent fell, zero its momentum
    30.         rigidBody.angularVelocity = Vector3.zero;
    31.         rigidBody.velocity = Vector3.zero;
    32.         transform.localPosition = initialAgentPosition;
    33.         transform.localRotation = initialAgentRotation;
    34.  
    35.         // Move the target to a new spot
    36.         target.localPosition = new Vector3(Random.Range(-3f, 8f), 0.5f, Random.Range(60f, 70f));
    37.     }
    38.  
    39.     public override void CollectObservations(VectorSensor sensor)
    40.     {
    41.         // Target and Agent positions
    42.         sensor.AddObservation(target.localPosition);
    43.         sensor.AddObservation(transform.localPosition);
    44.  
    45.         // Agent velocity
    46.         sensor.AddObservation(rigidBody.velocity.x);
    47.         sensor.AddObservation(rigidBody.velocity.z);
    48.     }
    49.  
    50.     private void OnCollisionEnter(Collision other)
    51.     {
    52.         if(other.gameObject.CompareTag("Wall") || other.gameObject.CompareTag("Obstacle") || other.gameObject.CompareTag("Vehicle"))
    53.         {
    54.             AddReward(-0.01f);
    55.             Debug.Log("Collided with " + other.gameObject.tag);
    56.             EndEpisode();
    57.         }
    58.     }
    59.  
    60.     public void MoveAgent(float[] vectorAction)
    61.     {
    62.         float acceleration = 0;
    63.         float steering = 0;
    64.         float braking = 0;
    65.  
    66.         acceleration = vectorAction[0];
    67.         steering = vectorAction[1];
    68.         braking = vectorAction[2];
    69.  
    70.         vehicleMovement.Accelerate(acceleration);
    71.         vehicleMovement.Steer(steering);
    72.         vehicleMovement.Brake(braking);
    73.     }
    74.  
    75.     public override void OnActionReceived(float[] vectorAction)
    76.     {
    77.         MoveAgent(vectorAction);
    78.  
    79.         // Rewards
    80.         float distanceToTarget = Vector3.Distance(transform.localPosition, target.localPosition);
    81.  
    82.         // Reached target
    83.         if (distanceToTarget < waypointDistanceThreshold)
    84.         {
    85.             SetReward(1.0f);
    86.             EndEpisode();
    87.         }
    88.  
    89.         // Fell off platform
    90.         if (transform.localPosition.y < 0)
    91.         {
    92.             EndEpisode();
    93.         }
    94.     }
    95.  
    96.     public override void Heuristic(float[] actionsOut)
    97.     {
    98.         actionsOut[0] = Input.GetAxis("Vertical");
    99.         actionsOut[1] = Input.GetAxis("Horizontal");
    100.  
    101.         actionsOut[2] = 0;
    102.         if(Input.GetKey(KeyCode.Space)) actionsOut[2] = 1;
    103.     }
    104. }
    105.  
     
    Last edited: Sep 15, 2020
  7. andrewcoh_unity

    andrewcoh_unity

    Unity Technologies

    Joined:
    Sep 5, 2019
    Posts:
    162
    Ah, ok. Maybe there was something buggy with the waypoints.
     
  8. AdiRidA

    AdiRidA

    Joined:
    Dec 12, 2019
    Posts:
    8
    I don't know if there is something buggy with the waypoints, because I tried implementing the waypoint system into the Roller Ball project instead of the cube target.

    After editing the project, I ran a training session with the original Roller Ball project for about 45 minutes, and then I did the same with the new Roller Ball project with the waypoints, and they gave pretty much the same results.

    The Orange line is of the project without the waypoints, and the Blue line is of the project with the waypoints.