Search Unity

  1. Unity 6 Preview is now available. To find out what's new, have a look at our Unity 6 Preview blog post.
    Dismiss Notice
  2. Unity is excited to announce that we will be collaborating with TheXPlace for a summer game jam from June 13 - June 19. Learn more.
    Dismiss Notice
  3. Dismiss Notice

Question ML-Agent getting worse

Discussion in 'ML-Agents' started by unity_16E0CB5E78EC6603BF04, Apr 15, 2024.

  1. unity_16E0CB5E78EC6603BF04

    unity_16E0CB5E78EC6603BF04

    Joined:
    May 12, 2022
    Posts:
    1
    [Full code included at end] The ML-Agent in Unity gets worse the longer it trains. In my game the player grabs a key and brings it to a door and starts again. The position of the key and door is randomized. Here is a video of me playing the game:

    The observations for the agent are; its position, the key's position, the door's position, and if it has the key. I have included the code segment below:
    Code (CSharp):
    1. public override void CollectObservations(VectorSensor sensor)
    2.     {
    3.         sensor.AddObservation(transform.localPosition);
    4.         sensor.AddObservation(key.transform.localPosition);
    5.         sensor.AddObservation(hasKey);
    6.         sensor.AddObservation(door.transform.localPosition);
    7.     }
    The rewards for the agent are; -1 at start, 0 when key is obtained, 1 when door opened. There is also a -.0000001 for each action and -.0001 for hitting a wall. I have included the code segments below:
    Code (CSharp):
    1. void Update()
    2.     {
    3.         if (transform.position.x > upperXLimit)
    4.         {
    5.             gameObject.transform.Translate(-1, 0, 0, Space.World);
    6.             AddReward(-.0001f);
    7.         }
    8.         //Same for other edges
    9. }
    10.  
    11.     public override void OnEpisodeBegin()
    12.     {
    13.         SetReward(-1f);
    14.     }
    15.  
    16.     public override void OnActionReceived(ActionBuffers actions)
    17.     {
    18.         AddReward(-.0000001f);
    19.         //.......
    20.     }
    21.  
    22. void OnTriggerEnter2D(Collider2D col)
    23.     {
    24.         if (col.gameObject == key)
    25.         {
    26.             SetReward(0);
    27.         }
    28.         if (col.gameObject == door)
    29.         {
    30.                 SetReward(+1f);
    31.         }
    32.     }
    The actions the agent can take are; idle, move up, move down, move right, move left. I have included the code segment below:
    Code (CSharp):
    1. public override void OnActionReceived(ActionBuffers actions)
    2.     {
    3.         AddReward(-.0000001f);
    4.         switch (direction)
    5.         {
    6.             case 0: // idle
    7.                 moveTo = Vector2.zero;
    8.                 break;
    9.             case 1: // left
    10.                 moveTo = new Vector2(-1, 0);
    11.                 break;
    12.             case 2: // right
    13.                 moveTo = new Vector2(1, 0);
    14.                 break;
    15.             case 3: // up
    16.                 moveTo = new Vector2(0, 1);
    17.                 break;
    18.             case 4: // down
    19.                 moveTo = new Vector2(0, -1);
    20.                 break;
    21.         }
    22.     }
    I have included the graphs from TensorFlow after training the agent for 2,000,000 steps. As you can see the cumulative reward goes down, while episode time goes up:

    Here is my config file for training the agent:
    Code (CSharp):
    1. behaviors:
    2.   My Behavior:
    3.     trainer_type: ppo
    4.     hyperparameters:
    5.       batch_size: 128
    6.       buffer_size: 2048
    7.       learning_rate: 0.00005
    8.       beta: 0.01
    9.       epsilon: 0.2
    10.       lambd: 0.95
    11.       num_epoch: 3
    12.       learning_rate_schedule: linear
    13.     network_settings:
    14.       normalize: false
    15.       hidden_units: 256
    16.       num_layers: 2
    17.       vis_encode_type: simple
    18.     reward_signals:
    19.       extrinsic:
    20.         gamma: 0.8
    21.         strength: 1.0
    22.     keep_checkpoints: 5
    23.     max_steps: 2000000
    24.     time_horizon: 64
    25.     summary_freq: 60000
    26.  
    Lastly I have included my complete code:
    Game.cs. Runs and resets game.
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4.  
    5. public class Game : MonoBehaviour
    6. {
    7.     public float upperXLimit;
    8.     public float lowerXLimit;
    9.     public float upperYLimit;
    10.     public float lowerYLimit;
    11.     public GameObject Key;
    12.     public GameObject Door;
    13.     public GameObject CurrentKey;
    14.     public GameObject CurrentDoor;
    15.     public AIPlayer AIPlayerScript;
    16.     public player PlayerScript;
    17.  
    18.     // Start is called before the first frame update
    19.     void Start()
    20.     {
    21.         resetBoard();
    22.     }
    23.  
    24.     public void resetBoard()
    25.     {
    26.         if (CurrentKey != null)
    27.         {
    28.             Destroy(CurrentKey);
    29.         }
    30.         if (CurrentDoor != null)
    31.         {
    32.             Destroy(CurrentDoor);
    33.         }
    34.         CurrentKey = Instantiate(Key, new Vector3(Mathf.Round(Random.Range(lowerXLimit, upperXLimit)), Mathf.Round(Random.Range(lowerYLimit, upperYLimit)), 0), Quaternion.identity);
    35.         CurrentDoor = Instantiate(Door, new Vector3(Mathf.Round(Random.Range(lowerXLimit, upperXLimit)), Mathf.Round(Random.Range(lowerYLimit, upperYLimit)), 0), Quaternion.identity);
    36.         AIPlayerScript.key = CurrentKey;
    37.         AIPlayerScript.door = CurrentDoor;
    38.         PlayerScript.key = CurrentKey;
    39.         PlayerScript.door = CurrentDoor;
    40.     }
    41. }
    42.  
    AIPlayer.cs. Runs the mlagent.
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Sensors;
    6. using Unity.MLAgents.Actuators;
    7. using static UnityEngine.RuleTile.TilingRuleOutput;
    8. using static AIPlayer;
    9. using UnityEditor.Tilemaps;
    10.  
    11. public class AIPlayer : Agent
    12. {
    13.     Collider2D col;
    14.     public double upperXLimit;
    15.     public double lowerXLimit;
    16.     public double upperYLimit;
    17.     public double lowerYLimit;
    18.     public bool hasKey = false;
    19.     private Vector2 moveTo = Vector2.zero;
    20.     private int direction;
    21.     public GameObject key;
    22.     public GameObject door;
    23.     private Game game;
    24.  
    25.     public enum MoveToDirection
    26.     {
    27.         Idle,
    28.         Left,
    29.         Right,
    30.         Up,
    31.         Down
    32.     }
    33.  
    34.     void Start()
    35.     {
    36.         game = GameObject.FindGameObjectWithTag("Game").GetComponent<Game>();
    37.     }
    38.  
    39.     // Update is called once per frame
    40.     void Update()
    41.     {
    42.         transform.Translate(moveTo, Space.World);
    43.         if (transform.position.x > upperXLimit)
    44.         {
    45.             gameObject.transform.Translate(-1, 0, 0, Space.World);
    46.             AddReward(-.0001f);
    47.             //Debug.Log("Wall");
    48.         }
    49.         if (transform.position.x < lowerXLimit)
    50.         {
    51.             gameObject.transform.Translate(1, 0, 0, Space.World);
    52.             AddReward(-.0001f);
    53.             //Debug.Log("Wall");
    54.         }
    55.         if (transform.position.y > upperYLimit)
    56.         {
    57.             gameObject.transform.Translate(0, -1, 0, Space.World);
    58.             AddReward(-.0001f);
    59.             //Debug.Log("Wall");
    60.         }
    61.         if (transform.position.y < lowerYLimit)
    62.         {
    63.             gameObject.transform.Translate(0, 1, 0, Space.World);
    64.             AddReward(-.0001f);
    65.             //Debug.Log("Wall");
    66.         }
    67.     }
    68.     public override void CollectObservations(VectorSensor sensor)
    69.     {
    70.         sensor.AddObservation(transform.localPosition);
    71.         sensor.AddObservation(key.transform.localPosition);
    72.         sensor.AddObservation(hasKey);
    73.         sensor.AddObservation(door.transform.localPosition);
    74.         //Debug.Log("Self: " + transform.localPosition + " | Key: " + key.transform.localPosition + " | hasKey: " + hasKey + " | Door: " + door.transform.localPosition);
    75.     }
    76.     public override void OnEpisodeBegin()
    77.     {
    78.         SetReward(-1f);
    79.     }
    80.  
    81.     private MoveToDirection moveToDirection = MoveToDirection.Idle;
    82.     public override void OnActionReceived(ActionBuffers actions)
    83.     {
    84.         AddReward(-.0000001f);
    85.         direction = actions.DiscreteActions[0];
    86.         //Debug.Log("Direction: " + direction);
    87.         switch (direction)
    88.         {
    89.             case 0: // idle
    90.                 moveTo = Vector2.zero;
    91.                 moveToDirection = MoveToDirection.Idle;
    92.  
    93.                 break;
    94.             case 1: // left
    95.                 moveTo = new Vector2(-1, 0);
    96.                 moveToDirection = MoveToDirection.Left;
    97.                 //interactor.localRotation = Quaternion.Euler(0, 0, 90);
    98.                 //animator.SetFloat("LastHorizontal", moveTo.x);
    99.                 break;
    100.             case 2: // right
    101.                 moveTo = new Vector2(1, 0);
    102.                 moveToDirection = MoveToDirection.Right;
    103.                 //interactor.localRotation = Quaternion.Euler(0, 0, -90);
    104.                 //animator.SetFloat("LastHorizontal", moveTo.x);
    105.                 break;
    106.             case 3: // up
    107.                 moveTo = new Vector2(0, 1);
    108.                 moveToDirection = MoveToDirection.Up;
    109.                 //interactor.localRotation = Quaternion.Euler(0, 0, 0);
    110.                 //animator.SetFloat("LastVertical", moveTo.y);
    111.                 break;
    112.             case 4: // down
    113.                 moveTo = new Vector2(0, -1);
    114.                 moveToDirection = MoveToDirection.Down;
    115.                 //interactor.localRotation = Quaternion.Euler(0, 0, 180);
    116.                 //animator.SetFloat("LastVertical", moveTo.y);
    117.                 break;
    118.         }
    119.     }
    120.     void OnTriggerEnter2D(Collider2D col)
    121.     {
    122.         if (col.gameObject == key)
    123.         {
    124.             hasKey = true;
    125.             col.gameObject.SetActive(false);
    126.             SetReward(0);
    127.         }
    128.         if (col.gameObject == door)
    129.         {
    130.             if (hasKey == true)
    131.             {
    132.                 SetReward(+1f);
    133.                 hasKey = false;
    134.                 EndEpisode();
    135.                 game.resetBoard();
    136.             }
    137.  
    138.         }
    139.     }
    140. }
    141.