Search Unity

  1. Welcome to the Unity Forums! Please take the time to read our Code of Conduct to familiarize yourself with the forum rules and how to post constructively.
  2. We have updated the language to the Editor Terms based on feedback from our employees and community. Learn more.
    Dismiss Notice

Reward by step skipped ?

Discussion in 'ML-Agents' started by Fly-st, Aug 31, 2020.

  1. Fly-st

    Fly-st

    Joined:
    Oct 9, 2018
    Posts:
    2
    I have some strange issue with my reward attribution (I am using the latest Realease 6 package).
    This is the environment I am using :
    upload_2020-9-1_0-46-11.png
    My agent is the red cube than can move along the track.
    I define my step reward as the oriented angle θ between 2 positions at 2 consecutive time steps, from the center (the green dot).

    If my agent is able to make a complete lap, one can easily see that the cumulative reward should be 360 (1 complete rotation = 360° ).
    But in practice I only get a value of around 180 for a complete lap (half of the expected value).

    This is the code I am using for my Agent controller :

    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using System;
    5. using Unity.MLAgents;
    6.  
    7. public class AgentController : Agent
    8. {
    9.     Rigidbody characterController;
    10.     float speed;
    11.     float acceleration = 0.1f;
    12.     float drag = 0.01f;
    13.     float rotationSpeed = 20.0f;
    14.     float max_speed = 1.5f;
    15.     int steps;
    16.     Vector3 initialPosition;
    17.     Vector3 lastPosition;
    18.     Quaternion initialRotation;
    19.  
    20.     public event Action OnReset;
    21.     public override void Initialize()
    22.     {
    23.         characterController = GetComponent<Rigidbody>();
    24.         speed = 0.0f;
    25.         steps = 0;
    26.         initialPosition = characterController.transform.position;
    27.         lastPosition = characterController.transform.position;
    28.         initialRotation = characterController.transform.rotation;
    29.     }
    30.  
    31.     public override void OnEpisodeBegin(){
    32.         Reset();
    33.     }
    34.  
    35.     public override void OnActionReceived(float[] vectorAction){
    36.         executeAction(vectorAction);
    37.     }
    38.  
    39.     public override void Heuristic(float[] actionsOut){
    40.         actionsOut[0]=0;
    41.         actionsOut[1]=0;
    42.         actionsOut[2]=0;
    43.         actionsOut[3]=0;
    44.         if (Input.GetKey(KeyCode.LeftArrow)){
    45.             actionsOut[3] = 1;}
    46.         if (Input.GetKey(KeyCode.RightArrow)){
    47.             actionsOut[2] = 1;}
    48.         if (Input.GetKey(KeyCode.UpArrow)){
    49.             actionsOut[0] = 1;}
    50.         if (Input.GetKey(KeyCode.DownArrow)){
    51.             actionsOut[1] = 1;}
    52. speed = Math.Min(max_speed, Math.Max(0.0f, speed + (actionsOut[0]-actionsOut[1]) * acceleration - drag));
    53.         float rotation = (actionsOut[2]-actionsOut[3]) * rotationSpeed;
    54.         characterController.transform.Translate(0.0f, 0.0f, speed * Time.deltaTime);
    55.         characterController.transform.Rotate(0.0f, rotation * Time.deltaTime, 0.0f);
    56.         float angle_reward = angleReward();
    57.         Debug.Log(GetCumulativeReward());
    58.         lastPosition = characterController.transform.position;
    59.     }
    60.     private void Reset()
    61.     {
    62.         characterController.transform.position = initialPosition;
    63.         characterController.transform.rotation = initialRotation;
    64.         speed = 0.0f;
    65.     }
    66.     private void executeAction(float[] vectorAction){
    67.         speed = Math.Min(max_speed, Math.Max(0.0f, speed + (vectorAction[0]-vectorAction[1]) * acceleration - drag));
    68.         float rotation = (vectorAction[2]-vectorAction[3]) * rotationSpeed;
    69.         characterController.transform.Translate(0.0f, 0.0f, speed * Time.deltaTime);
    70.         characterController.transform.Rotate(0.0f, rotation * Time.deltaTime, 0.0f);
    71.         AddReward(angleReward());
    72.         lastPosition = characterController.transform.position;
    73.         Debug.Log(GetCumulativeReward());
    74.     }
    75.      public float angleReward()
    76.      {
    77.         Vector3 currentPosition = characterController.transform.position;
    78.         return Vector3.SignedAngle(currentPosition, lastPosition, new Vector3(0,1,0));
    79.      }
    80.     void OnCollisionEnter(Collision collision)
    81.     {
    82.         EndEpisode();
    83.     }
    84.  
    85. }
    86.  
    And here are the other relevant parameters of my agent :
    upload_2020-9-1_0-47-15.png

    I tried to print the reward at each step and as long as I am in movement it is not null.
    I also tried computing the angle between the current and initial position and the value is correct (I get 180 for half a lap while cumulative is around 90).

    I know I could just add a doubled reward, but it frustrate me not knowing why things aren’t working as intended.
    Anyone got some insight on the issue ?
     
    Last edited: Sep 1, 2020
  2. Fly-st

    Fly-st

    Joined:
    Oct 9, 2018
    Posts:
    2
    Managed to solve the issue, the heursitic part wasn't adding the value to the reward, and I misunderstood how the heursitc method actually works.