Search Unity

Question My Agent Keeps Doing A Discrete Action Even Though I Gave It A Negative Reward

Discussion in 'ML-Agents' started by Propricen, May 3, 2023.

  1. Propricen

    Propricen

    Joined:
    Mar 4, 2022
    Posts:
    3
    Hello everyone i am new to using Unity ML-Agents, I have created an environment for my agent to train but the agent keeps jumping even when i give the jump action -100 reward, can someone help me please i can provide more data if needed.

    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Actuators;
    6. using Unity.MLAgents.Sensors;
    7.  
    8. public class MovetoGoal : Agent
    9. {
    10.     [SerializeField] private Transform targetTransform;
    11.     [SerializeField] private Transform restart;
    12.     [SerializeField] private Rigidbody2D rb;
    13.     public float JumpPower = 0.1f;
    14.     private bool isGrounded = true;
    15.     private float previousDistanceToPlayer;
    16.     private float totalReward = 0;
    17.     public override void OnEpisodeBegin()
    18.     {
    19.         transform.position = restart.transform.position;
    20.     }
    21.     public override void CollectObservations(VectorSensor sensor)
    22.     {
    23.         sensor.AddObservation(transform.localPosition);
    24.         sensor.AddObservation(targetTransform.localPosition);
    25.         sensor.AddObservation(rb.velocity);
    26.     }
    27.     public override void OnActionReceived(ActionBuffers actions)
    28.     {
    29.         float moveX = actions.ContinuousActions[0];
    30.         int jump = actions.DiscreteActions[0];
    31.         rb.velocity = new Vector2(moveX * 8f, rb.velocity.y);
    32.         if (jump != 0 && isGrounded)
    33.         {
    34.             rb.velocity = new Vector2(rb.velocity.x, JumpPower);
    35.             isGrounded = false;
    36.             SetReward(-1f);
    37.             totalReward = totalReward - 1f;
    38.         }
    39.         float distanceToPlayer = Vector2.Distance(transform.position, targetTransform.position);
    40.  
    41.         // If the agent gets too far away from the player, give a negative reward
    42.         if (distanceToPlayer >= previousDistanceToPlayer)
    43.         {
    44.             SetReward(-0.1f);
    45.             totalReward = totalReward - 0.1f;      
    46.         }
    47.  
    48.         // If the agent gets closer to the player, give a positive reward
    49.         if (distanceToPlayer < previousDistanceToPlayer)
    50.         {
    51.             SetReward(0.1f);
    52.             totalReward = totalReward + 0.1f;
    53.         }
    54.  
    55.         // Update the previous distance to the player for the next time step
    56.         previousDistanceToPlayer = distanceToPlayer;
    57.     }
    58.     public override void Heuristic(in ActionBuffers actionsOut)
    59.     {
    60.         ActionSegment<float> continousActions = actionsOut.ContinuousActions;
    61.         continousActions[0] = Input.GetAxisRaw("Horizontal");
    62.     }
    63.     private void OnTriggerEnter2D(Collider2D other)
    64.     {
    65.         if (other.gameObject.tag == "Goal")
    66.         {
    67.             //Debug.Log(totalReward);
    68.             SetReward(+1f);
    69.             totalReward = totalReward + 1f;
    70.             EndEpisode();
    71.         }
    72.         if (other.gameObject.tag == "Wall")
    73.         {
    74.             //Debug.Log(totalReward);
    75.             SetReward(-1f);
    76.             totalReward = totalReward - 1f;
    77.             EndEpisode();
    78.         }
    79.     }
    80.     void OnCollisionEnter2D(Collision2D col)
    81.     {
    82.         if (col.gameObject.tag == "Ground")
    83.         {
    84.             isGrounded = true;
    85.         }
    86.     }
    87. }
     
  2. kokimitsunami

    kokimitsunami

    Joined:
    Sep 2, 2021
    Posts:
    25
    Hi, I think here you should use AddReward() instead of SetReward(). AddReward() increments reward by the provided value, while SetReward() overrides the current reward.
    You can confirm the difference of those API here. I also found a discussion on the difference here. I hope this helps.
     
    Propricen likes this.
  3. Propricen

    Propricen

    Joined:
    Mar 4, 2022
    Posts:
    3
    That fixed it thank you so much!
     
    kokimitsunami likes this.