Search Unity

Question In multi-agent environment, agents don't get reward and observation, and end episode at right time

Discussion in 'ML-Agents' started by daishiqin1996, Nov 17, 2022.

  1. daishiqin1996

    daishiqin1996

    Joined:
    May 18, 2022
    Posts:
    2
    Hi

    I encountered issue of not able to control the correct execution order of multiple agents. It means I can't let an agent to add reward and observation at right time so the information to be added is not updated yet. I also can't end episode at the same time right after the game should be over. So I want to find some kinds of "Lock" to make sure a function only can run after the other one is done.

    However, ML-agents is controlled by Academy. I can't just make a script to run funstions in order. I tried to use
    Academy.Instance.AutomaticSteppingEnabled = false
    but it makes the environment slow, and I only disable it when a character dies, it doesn't work. I also tried to use lock, but it doesn't help either.

    Below is the my whole debugging process, but problem is not solved yet.

    I made a "Hide and Seek" game with ML-agents. The setting is very simple. There are 2 teams: hiders and seekers. When a hider is collided by one or more seekers, it'll be deactivated. After all of the hiders get caught, all the agents should end epsisode.
    Code (CSharp):
    1. using System;
    2. using System.Collections.Generic;
    3. using Unity.MLAgents;
    4. using Unity.MLAgents.Actuators;
    5. using Unity.MLAgents.Sensors;
    6. using UnityEngine;
    7. using UnityEngine.AI;
    8. using UnityEngine.InputSystem;
    9.  
    10. /// <summary>
    11. ///     Base class for game agents
    12. /// </summary>
    13. public class GameAgent : Agent
    14. {
    15.     //Set input for players
    16.     public InputAction moveInput;
    17.     public InputAction dirInput;
    18.    
    19.     [HideInInspector] public float mapSize;
    20.  
    21.     //Player's parameter
    22.     public float moveSpeed = 0.5f;
    23.     public float rotateSpeed = 200f;
    24.  
    25.     //If in training or inference mode
    26.     public bool trainingMode;
    27.  
    28.     //If true, meaning the agent is still activated
    29.     public bool alive;
    30.  
    31.     //If true, destroy the hider on the next step
    32.     private bool hiderDestroyFlag;
    33.  
    34.     //Player's destinationPosition and rotation on the last step
    35.     private Vector3 lastPosition;
    36.     private Quaternion lastRotation;
    37.  
    38.     //Player spawner as the parent of all players
    39.     private PlayerSpawner playerSpawner;
    40.  
    41.     public List<bool> detected;
    42.     //private Color originalColor;
    43.    
    44.     //Steps to freeze seekers, so hiders have preparation time
    45.     private int stepLeftToFreeze;
    46.     /// <summary>
    47.     ///     Disable inputs when agent is destroyed.
    48.     /// </summary>
    49.     private void OnDestroy()
    50.     {
    51.         moveInput.Disable();
    52.         dirInput.Disable();
    53.     }
    54.  
    55.     public void OnCollisionEnter(Collision collision)
    56.     {
    57.  
    58.         if (collision.gameObject.CompareTag("Seeker") && gameObject.CompareTag("Hider"))
    59.         {
    60.             //Add reward when get caught as a hider
    61.             hiderDestroyFlag = true;
    62.  
    63.             //Turn its camera to black when a hider is caught
    64.             var camera = transform.Find("Eye").Find("Camera").GetComponent<Camera>();
    65.             camera.clearFlags = CameraClearFlags.SolidColor;
    66.             camera.backgroundColor = Color.black;
    67.             camera.cullingMask = 0;
    68.         }
    69.         //Todo: Add the reward at the same time as hider getting caught
    70.         if (collision.gameObject.CompareTag("Hider") && gameObject.CompareTag("Seeker"))
    71.         {
    72.             //Add reward when catch a hider
    73.             AddReward(1);
    74.             //print("Caught");
    75.         }
    76.        
    77.     }
    78.  
    79.     /// <summary>
    80.     ///     Initialize ML-agent.
    81.     /// </summary>
    82.     public override void Initialize()
    83.     {
    84.        
    85.         //Enable inputs
    86.         moveInput.Enable();
    87.         dirInput.Enable();
    88.        
    89.         playerSpawner = FindObjectOfType<PlayerSpawner>();
    90.        
    91.         //Get map size
    92.         var terrainAndRockSetting = FindObjectOfType<TerrainAndRockSetting>();
    93.         mapSize = terrainAndRockSetting.CalculateMapSize() / 2;
    94.        
    95.         //Set the MaxStep as 5000 in training mode, 0 (inf) in inference mode
    96.         MaxStep = trainingMode ? 5000 : 0;
    97.  
    98.     }
    99.  
    100.     /// <summary>
    101.     ///     Heuristic control, where W: go forward, S: go backward, A: turn left, D: turn right.
    102.     /// </summary>
    103.     /// <param name="actionsOut"></param>
    104.     public override void Heuristic(in ActionBuffers actionsOut)
    105.     {
    106.         var discreteActionsOut = actionsOut.DiscreteActions;
    107.         discreteActionsOut[0] = (int)moveInput.ReadValue<float>();
    108.         discreteActionsOut[1] = (int)dirInput.ReadValue<float>();
    109.     }
    110.  
    111.     /// <summary>
    112.     ///     Initialize player when episode begins
    113.     /// </summary>
    114.     public override void OnEpisodeBegin()
    115.     {
    116.         stepLeftToFreeze = playerSpawner.numStepToFreeze;
    117.         alive = true;
    118.         gameObject.transform.GetChild(0).gameObject.SetActive(true);
    119.         gameObject.transform.GetChild(1).gameObject.SetActive(true);
    120.         gameObject.layer = LayerMask.NameToLayer(gameObject.tag);
    121.         gameObject.GetComponent<Collider>().enabled = true;
    122.         PlayerSpawner.ResetCamera(gameObject.transform);
    123.         playerSpawner.RelocatePlayer(gameObject.transform);
    124.         GetComponent<Rigidbody>().velocity = Vector3.zero;
    125.         GetComponent<Rigidbody>().angularVelocity = Vector3.zero;
    126.  
    127.     }
    128.  
    129.     /// <summary>
    130.     ///     Collect obsevrations
    131.     /// </summary>
    132.     /// <param name="sensor"></param>
    133.     public override void CollectObservations(VectorSensor sensor)
    134.     {
    135.         //Destroy hiders when caught
    136.         if (gameObject.CompareTag("Hider") && hiderDestroyFlag)
    137.         {
    138.             AddReward(-1);
    139.             hiderDestroyFlag = false;
    140.             alive = false;
    141.             gameObject.transform.GetChild(0).gameObject.SetActive(false);
    142.             gameObject.transform.GetChild(1).gameObject.SetActive(false);
    143.             gameObject.GetComponent<Collider>().enabled = false;
    144.             gameObject.layer = LayerMask.NameToLayer("Ignore Raycast");
    145.         }
    146.         sensor.AddObservation(alive);
    147.         sensor.AddObservation(PlayerSpawner.CountActiveNumHider(transform.parent.gameObject));
    148.  
    149.         if (gameObject.CompareTag("Seeker")) AddReward(-0.1f);
    150.  
    151.         //Add reward for surviving each step
    152.         if (gameObject.CompareTag("Hider") && alive)
    153.             AddReward(0.1f);
    154.     }
    155.  
    156.     /// <summary>
    157.     ///     Update agent's status when action is received.
    158.     /// </summary>
    159.     /// <param name="actionBuffers"></param>
    160.     public override void OnActionReceived(ActionBuffers actionBuffers)
    161.     {
    162.         //transform.Find("Body").GetComponent<Renderer>().material.color = originalColor;
    163.         if (detected.Count > 0)
    164.         {
    165.             //transform.Find("Body").GetComponent<Renderer>().material.color = Color.yellow;
    166.             detected.Clear();
    167.         }
    168.         if (gameObject.CompareTag("Seeker") && stepLeftToFreeze > 0)
    169.         {
    170.             stepLeftToFreeze--;
    171.             return;
    172.         }
    173.        
    174.         if (alive)
    175.             MoveAgent(actionBuffers.DiscreteActions);
    176.  
    177.  
    178.     }
    179.  
    180.     /// <summary>
    181.     ///     Move agent by control.
    182.     /// </summary>
    183.     /// <param name="act"></param>
    184.  
    185.     public virtual void MoveAgent(ActionSegment<int> act)
    186.     {
    187.         var dirToGo = Vector3.zero;
    188.         var rotateDir = Vector3.zero;
    189.         var flag = false;
    190.         dirToGo = transform.forward * act[0];
    191.         rotateDir = Vector3.up * act[1];
    192.         transform.Rotate(rotateDir, Time.deltaTime * rotateSpeed);
    193.         GetComponent<Rigidbody>().velocity = dirToGo * moveSpeed;
    194.         if (act[0] != 0)
    195.         {
    196.             GetComponent<PlaceObjectsToSurface>().StartPlacing(moveSpeed * dirToGo,true, false);
    197.         }
    198.     }
    199. }
    I first tried to
    EndEpisode ()
    to in each agent's script. However, when they count the remaining hiders, some of them count after other agents have already ended and began episodes. So I created an
    AgentManager.cs
    to end episodes for all agents.

    Code (CSharp):
    1. public class AgentManager : MonoBehaviour
    2. {
    3.     public bool ifEndEpisode;
    4.     private PlayerSpawner playerSpawner;
    5.     private Transform[] players;
    6.     private void Awake()
    7.     {
    8.         playerSpawner = FindObjectOfType<PlayerSpawner>();
    9.         players = new Transform[playerSpawner.playerSpawner.transform.childCount];
    10.         for (var i = 0; i < playerSpawner.playerSpawner.transform.childCount; i++)
    11.         {
    12.             players[i] = playerSpawner.playerSpawner.transform.GetChild(i);
    13.         }
    14.     }
    15.  
    16.     // Update is called once per frame
    17.     void Update()
    18.     {
    19.         if (PlayerSpawner.CountActiveNumHider(playerSpawner.playerSpawner)==0)
    20.         {
    21.             for (var i = 0; i < players.Length; i++)
    22.             {
    23.                 players[i].GetComponent<GameAgent>().EndEpisode();
    24.             }
    25.         }
    26.     }
    27.    
    28. }
    It doesn't have the counting issue, but there's always redudant observation and actions after hiders died, meaning the episode was ended with delay.

    I think I have to stop the autostep by setting
    Academy.Instance.AutomaticSteppingEnabled = false
    , then use
    EnvironmentStep()
    manually. However, the game becomes very slow, I'm really confused.

    Then I tried to disable auto stepping only when a hider is caught. Because I think the order of ML-agents is request action, process action, make observation and add rewards (Please let me know if I'm wrong)
    Code (CSharp):
    1. using System;
    2. using System.Collections;
    3. using System.Collections.Generic;
    4. using Unity.MLAgents;
    5. using Unity.VisualScripting.Dependencies.NCalc;
    6. using UnityEngine;
    7.  
    8. public class AgentManager : MonoBehaviour
    9. {
    10.     private PlayerSpawner playerSpawner;
    11.     private List<GameAgent> hiders;
    12.     private List<GameAgent> seekers;
    13.     public bool[] aliveFlag;
    14.     private void Awake()
    15.     {
    16.         playerSpawner = FindObjectOfType<PlayerSpawner>();
    17.         seekers = new List<GameAgent>();
    18.         hiders = new List<GameAgent>();
    19.         for (var i = 0; i < playerSpawner.playerSpawner.transform.childCount; i++)
    20.         {
    21.             if (playerSpawner.playerSpawner.transform.GetChild(i).CompareTag("Seeker"))
    22.             {
    23.                 seekers.Add(playerSpawner.playerSpawner.transform.GetChild(i).GetComponent<GameAgent>());
    24.             }
    25.             else
    26.             {
    27.                 hiders.Add(playerSpawner.playerSpawner.transform.GetChild(i).GetComponent<GameAgent>());
    28.             }
    29.         }
    30.  
    31.         aliveFlag = new bool[hiders.Count];
    32.         ResetAliveFlag();
    33.     }
    34.  
    35.     private void ResetAliveFlag()
    36.     {
    37.         for (var i = 0; i < hiders.Count; i++)
    38.         {
    39.             aliveFlag[i] = true;
    40.         }
    41.     }
    42.  
    43.     // Update is called once per frame
    44.     void FixedUpdate()
    45.     {
    46.     for (var i = 0; i < hiders.Count; i++)
    47.         {
    48.             if (hiders[i].alive != aliveFlag[i] && !hiders[i].alive)
    49.             {
    50.                 for (var j = 0; j < seekers.Count; j++)
    51.                 {
    52.                     seekers[j].SetReward(1f);
    53.                     //Skip adding time-consuming reward if a hider is caught
    54.                     seekers[j].skipReward = true;
    55.                 }
    56.  
    57.                 hiders[i].SetReward(-1f);
    58.             }
    59.  
    60.             aliveFlag[i] = hiders[i].alive;
    61.         }
    62.  
    63.         if (CountActiveNumHider() == 0)
    64.         {
    65.             for (var i = 0; i < hiders.Count; i++)
    66.             {
    67.                 hiders[i].EndEpisode();
    68.             }
    69.  
    70.             for (var i = 0; i < seekers.Count; i++)
    71.             {
    72.                 seekers[i].EndEpisode();
    73.             }
    74.  
    75.             ResetAliveFlag();
    76.         }
    77.  
    78.         Academy.Instance.AutomaticSteppingEnabled = true;
    79.     }
    80.     public int CountActiveNumHider()
    81.     {
    82.         if (aliveFlag.Length == 0)
    83.             return 0;
    84.         var numHider = 0;
    85.         foreach (var flag in aliveFlag)
    86.             if (flag)
    87.                     numHider++;
    88.         return numHider;
    89.     }
    90. }
    91.  
    I also changed the agent's script, by setting alive = false in
    OnCollisionEnter
    And add
    Academy.Instance.AutomaticSteppingEnabled = false
    when it's not alive.

    Code (CSharp):
    1.     public void OnCollisionEnter(Collision collision)
    2.     {
    3.         if (collision.gameObject.CompareTag("Seeker") && gameObject.CompareTag("Hider"))
    4.         {
    5.             //Turn its camera to black when a hider is caught
    6.             var camera = transform.Find("Camera").GetComponent<Camera>();
    7.             camera.clearFlags = CameraClearFlags.SolidColor;
    8.             camera.backgroundColor = Color.black;
    9.             camera.cullingMask = 0;
    10.             alive = false;
    11.             skipReward = true;
    12.             gameObject.transform.Find("Body").gameObject.SetActive(false);
    13.             gameObject.transform.Find("Eye").gameObject.SetActive(false);
    14.             gameObject.GetComponent<Collider>().enabled = false;
    15.             gameObject.layer = LayerMask.NameToLayer("Ignore Raycast");
    16.         }
    17.     }
    18.  
    19. public override void CollectObservations(VectorSensor sensor)
    20.     {
    21.         if (!alive) Academy.Instance.AutomaticSteppingEnabled = false;
    22.         sensor.AddObservation(alive);
    23.         sensor.AddObservation(agentManager.CountActiveNumHider());
    24.         //Add reward for surviving each step
    25.         if (gameObject.CompareTag("Hider") && alive)
    26.             AddReward(0.1f);
    27.         //Only add time-consuming reward when no hiders is caught by all seekers
    28.         if (gameObject.CompareTag("Seeker") && !skipReward)
    29.         {
    30.             AddReward(-0.1f);
    31.         }
    32.  
    33.         //Reset skip to false if just skip
    34.         //Todo: might be problematic when seekers catch hiders in both consecutive observations
    35.         if (gameObject.CompareTag("Seeker") && skipReward)
    36.             skipReward = false;
    37.  
    38.         step++;
    39.     }
    40.  
    This time, when the hiders get a penelty of getting caught, the next state still shows it's alive. So still the environment keeps stepping after I disabled auto stepping.

    Then I tried to use threading lock.
    Code (CSharp):
    1. public void OnCollisionEnter(Collision collision)
    2.     {
    3.         if (collision.gameObject.CompareTag("Seeker") && gameObject.CompareTag("Hider"))
    4.         {
    5.             lock (balanceLock)
    6.             {
    7.                 //Turn its camera to black when a hider is caught
    8.                 var camera = transform.Find("Camera").GetComponent<Camera>();
    9.                 camera.clearFlags = CameraClearFlags.SolidColor;
    10.                 camera.backgroundColor = Color.black;
    11.                 camera.cullingMask = 0;
    12.                 alive = false;
    13.                 skipReward = true;
    14.                 gameObject.transform.Find("Body").gameObject.SetActive(false);
    15.                 gameObject.transform.Find("Eye").gameObject.SetActive(false);
    16.                 gameObject.GetComponent<Collider>().enabled = false;
    17.                 gameObject.layer = LayerMask.NameToLayer("Ignore Raycast");
    18.             }
    19.         }
    20.     }
    21. public override void CollectObservations(VectorSensor sensor)
    22.     {
    23.         lock (balanceLock)
    24.         {
    25.             if (!alive) Academy.Instance.AutomaticSteppingEnabled = false;
    26.             sensor.AddObservation(alive);
    27.             sensor.AddObservation(agentManager.CountActiveNumHider());
    28.             //Add reward for surviving each step
    29.             if (gameObject.CompareTag("Hider") && alive)
    30.                 AddReward(0.1f);
    31.             //Only add time-consuming reward when no hiders is caught by all seekers
    32.             if (gameObject.CompareTag("Seeker") && !skipReward)
    33.             {
    34.                 AddReward(-0.1f);
    35.             }
    36.  
    37.             //Reset skip to false if just skip
    38.             //Todo: might be problematic when seekers catch hiders in both consecutive observations
    39.             if (gameObject.CompareTag("Seeker") && skipReward)
    40.                 skipReward = false;
    41.  
    42.             step++;
    43.         }
    44.     }
    However, it still doesn't work.

    Anyone has any idea? Any help would be much appreciated.
     
  2. fraa1197

    fraa1197

    Joined:
    Mar 24, 2021
    Posts:
    3
    Multiple agents are working for you? For me only one ever moves. Is there a certain setting you have to turn on?
     
  3. daishiqin1996

    daishiqin1996

    Joined:
    May 18, 2022
    Posts:
    2
    I'm not sure what happened to you. For me I just created a script for each agent, then it works
     
  4. fraa1197

    fraa1197

    Joined:
    Mar 24, 2021
    Posts:
    3
    I guess I made a mistake.
    ¯\_(ツ)_/¯