Search Unity

  1. We are migrating the Unity Forums to Unity Discussions. On July 12, the Unity Forums will become read-only.

    Please, do not make any changes to your username or email addresses at id.unity.com during this transition time.

    It's still possible to reply to existing private message conversations during the migration, but any new replies you post will be missing after the main migration is complete. We'll do our best to migrate these messages in a follow-up step.

    On July 15, Unity Discussions will become read-only until July 18, when the new design and the migrated forum contents will go live.


    Read our full announcement for more information and let us know if you have any questions.

Question Agents are making repetitive useless decisions

Discussion in 'ML-Agents' started by CenteredMindset, Nov 25, 2023.

  1. CenteredMindset

    CenteredMindset

    Joined:
    Nov 24, 2023
    Posts:
    1
    I'm recreating Pacman and I'm trying to create a Pacman agent to go through the board, collect all the dots/pellets and avoid the ghosts.

    I have split my training up into 2 parts, so initially Pacman would learn to navigate the board and collect all the pellets with no ghosts. Afterwards the ghosts will be introduced and the agent can adapt to that environment.

    However I am trying to figure out why my agent is simply not learning. I have set observations to be the x and y of the nearest pellet, the direction to that pellet and the distance to it. I have added ray observations to the front and sides of Pacman to detect if there are walls present. And of course I have added the x and y coordinates of the Pacman agent itself. I have tried both with and without normalizing values and I always get the same result. The agent will do very well in the first 30-60 seconds and collect about 80% of the pellets, but then it will usually get stuck in a loop of repetitive useless moves around an area in the map when there are very few pellets left and loop around for about a minute before collecting the last few pellets. It can do this almost immediately when training starts, but doesn't improve at all with more training. I can't figure out how to make my agents better at coordinating through the board to reach the pellets. This is all just for the first part with no ghosts yet.

    I have made my agent use 3 discrete actions - do nothing, turn left, turn right (relative to Pacman's current direction).

    This is my EpisodeManager script.

    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using UnityEngine.Tilemaps;
    5.  
    6. using Unity.MLAgents;
    7. using Unity.MLAgents.Sensors;
    8. using Unity.MLAgents.Actuators;
    9.  
    10. public class EpisodeManager : Agent
    11. {
    12.     public Pacman pacmanAgent;
    13.     public GameManager gameManager;
    14.  
    15.     private float currentRoundTimer = 0f;
    16.  
    17.     public Tilemap tilemap;
    18.     private float mapWidth;
    19.     private float mapHeight;
    20.  
    21.     private void Start()
    22.     {
    23.         mapWidth = tilemap.cellBounds.size.x;
    24.         mapHeight = tilemap.cellBounds.size.y;
    25.     }
    26.  
    27.     public override void OnEpisodeBegin()
    28.     {
    29.         gameManager.NewGame();
    30.         currentRoundTimer = 0f;
    31.         Time.timeScale = 1.0f;
    32.     }
    33.  
    34.     // normalize position of dots and pacman
    35.     private Vector2 NormalizePos(Vector2 pos)
    36.     {
    37.         Vector2 normalizedPos = new Vector2(
    38.             Mathf.Lerp(-1f, 1f, pos.x / mapWidth),
    39.             Mathf.Lerp(-1f, 1f, pos.y / mapHeight)
    40.         );
    41.  
    42.         return normalizedPos;
    43.     }
    44.  
    45.     // normalize distance to dot
    46.     private float NormalizeDist(float distance)
    47.     {
    48.         float maxPossibleDistance = Mathf.Sqrt(mapWidth * mapWidth + mapHeight * mapHeight);
    49.         float normalizedDistance = Mathf.Clamp01(distance / maxPossibleDistance);
    50.         return normalizedDistance;
    51.     }
    52.  
    53.     private void AddRayObservations(VectorSensor sensor)
    54.     {
    55.         bool wallInFront = Physics2D.Raycast(pacmanAgent.transform.position, pacmanAgent.movement.direction, 0.6f, LayerMask.GetMask("Wall")).collider != null;
    56.         sensor.AddObservation(wallInFront);
    57.  
    58.         float[] sideRayAngles = { 90f, 270f };
    59.         foreach (float angle in sideRayAngles)
    60.         {
    61.             // Rotate the ray direction based on Pacman's current rotation
    62.             Vector2 rotatedRayDirection = Quaternion.Euler(0, 0, angle) * pacmanAgent.movement.direction;
    63.  
    64.             RaycastHit2D sideHit = Physics2D.Raycast(
    65.                 pacmanAgent.transform.position, rotatedRayDirection, 0.6f, LayerMask.GetMask("Wall")
    66.             );
    67.  
    68.             // Add observation indicating if a wall is present
    69.             bool wallOnSide = sideHit.collider != null;
    70.             sensor.AddObservation(wallOnSide);
    71.         }
    72.     }
    73.  
    74.     public override void CollectObservations(VectorSensor sensor)
    75.     {
    76.         // pacman position and direction
    77.         Vector2 normalizedPacman = NormalizePos(pacmanAgent.transform.position);
    78.         sensor.AddObservation(normalizedPacman.x);
    79.         sensor.AddObservation(normalizedPacman.y);
    80.         sensor.AddObservation(pacmanAgent.movement.direction.x);
    81.         sensor.AddObservation(pacmanAgent.movement.direction.y);
    82.  
    83.         // add rays to front and sides to detect wall
    84.         AddRayObservations(sensor);
    85.  
    86.         // closest dot pos
    87.         Vector3 dot = FindClosestDot();
    88.         Vector2 normalizedDot = NormalizePos(dot);
    89.         sensor.AddObservation(normalizedDot.x);
    90.         sensor.AddObservation(normalizedDot.y);
    91.         // direction to dot
    92.         Vector2 directionToDot = (dot - pacmanAgent.transform.position).normalized;
    93.         sensor.AddObservation(directionToDot.x);
    94.         sensor.AddObservation(directionToDot.y);
    95.         // distance to dot
    96.         float distanceToClosestDot = Vector3.Distance(pacmanAgent.transform.position, dot);
    97.         float normalizedDistToDot = NormalizeDist(distanceToClosestDot);
    98.         sensor.AddObservation(normalizedDistToDot);
    99.     }
    100.  
    101.     private Vector3 FindClosestDot()
    102.     {
    103.         Vector3 pacmanPosition = pacmanAgent.transform.position;
    104.         Transform closestDot = null;
    105.         float closestDistance = float.MaxValue;
    106.  
    107.         // Loop through each dot on the board
    108.         foreach (Transform dot in gameManager.dots)
    109.         {
    110.             if (dot.gameObject.activeSelf)
    111.             {
    112.                 // Calculate the distance between Pacman and the current dot
    113.                 float distance = Vector3.Distance(pacmanPosition, dot.position);
    114.  
    115.                 // Check if the current dot is closer than the stored closest dot
    116.                 if (distance < closestDistance)
    117.                 {
    118.                     closestDistance = distance;
    119.                     closestDot = dot;
    120.                 }
    121.             }
    122.         }
    123.  
    124.         return closestDot != null ? closestDot.position : Vector3.zero;
    125.     }
    126.  
    127.     public override void OnActionReceived(ActionBuffers actionBuffers)
    128.     {
    129.         currentRoundTimer += Time.fixedDeltaTime;
    130.         int movementAction = actionBuffers.DiscreteActions[0];
    131.  
    132.         Vector2 direction = Vector2.zero;
    133.         switch (movementAction)
    134.         {
    135.             case 0:
    136.                 break;
    137.             case 1:
    138.                 direction = Vector2.left;
    139.                 break;
    140.             case 2:
    141.                 direction = Vector2.right;
    142.                 break;
    143.         }
    144.  
    145.         if (direction != Vector2.zero)
    146.         {
    147.             pacmanAgent.movement.SetDirection(direction);
    148.         }
    149.  
    150.         float angle = Mathf.Atan2(pacmanAgent.movement.direction.y, pacmanAgent.movement.direction.x);
    151.         pacmanAgent.transform.rotation = Quaternion.AngleAxis(angle * Mathf.Rad2Deg, Vector3.forward);
    152.  
    153.         // Check game conditions
    154.         if (gameManager.pacmanDead)
    155.         {
    156.             SetReward(-1f);
    157.             EndEpisode();
    158.         }
    159.         if (gameManager.pacmanWon)
    160.         {
    161.             SetReward(1f);
    162.             EndEpisode();
    163.         }
    164.         if (currentRoundTimer >= 120)
    165.         {
    166.             EndEpisode();
    167.         }
    168.  
    169.         SetReward(-0.001f);
    170.     }
    171.  
    172.     public void DotEaten()
    173.     {
    174.         SetReward(0.01f);
    175.     }
    176. }
    177.  
    178.  
    This is my yaml file.

    behaviors:
    Pacman:
    trainer_type: ppo
    hyperparameters:
    batch_size: 128
    buffer_size: 1024
    learning_rate: 3.0e-4
    beta: 5.0e-4
    epsilon: 0.2
    lambd: 0.99
    num_epoch: 3
    learning_rate_schedule: linear
    beta_schedule: constant
    epsilon_schedule: linear
    network_settings:
    normalize: false
    hidden_units: 128
    num_layers: 2
    reward_signals:
    extrinsic:
    gamma: 0.99
    strength: 1.0
    curiosity:
    strength: 0.01
    gamma: 0.99
    max_steps: 5000000
    time_horizon: 32
    summary_freq: 5000

    I appreciate your help!
     
    Last edited: Nov 25, 2023