Search Unity

  1. Welcome to the Unity Forums! Please take the time to read our Code of Conduct to familiarize yourself with the forum rules and how to post constructively.
  2. Dismiss Notice

Agent training is working fine but exported model dose not working in Unity

Discussion in 'ML-Agents' started by shohan4556, Mar 30, 2021.

  1. shohan4556

    shohan4556

    Joined:
    Feb 3, 2016
    Posts:
    11
    I have training vehicle agent to follow a simple track. I am using behavioural cloning when I train the model I see it works fine but after that when I export it in Unity the agent just stuck.

    my config file
    ------------------

    Code (CSharp):
    1. default:
    2.     trainer: ppo
    3.     batch_size: 1024
    4.     beta: 5.0e-3
    5.     buffer_size: 10240
    6.     epsilon: 0.2
    7.     hidden_units: 128
    8.     lambd: 0.99
    9.     learning_rate: 3.0e-4
    10.     max_steps: 5000000
    11.     memory_size: 256
    12.     normalize: false
    13.     num_epoch: 3
    14.     num_layers: 2
    15.     time_horizon: 64
    16.     sequence_length: 64
    17.     summary_freq: 10000
    18.     use_recurrent: false
    19.     reward_signals:
    20.         extrinsic:
    21.             strength: 1.0
    22.             gamma: 0.99
    23.  
    24.  
    25. RaceAgent:
    26.     summary_freq: 10000
    27.     time_horizon: 64
    28.     batch_size: 256
    29.     buffer_size: 2048
    30.     hidden_units: 128
    31.     num_layers: 2
    32.     beta: 5.0e-4
    33.     learning_rate_schedule: linear
    34.     max_steps: 5.0e7
    35.     num_epoch: 3
    36.     behavioral_cloning:
    37.         demo_path: RaceAgentN4_1.demo
    38.         strength: 1.0
    39.         steps: 150000
    40.     reward_signals:
    41.         extrinsic:
    42.             strength: 0.1
    43.             gamma: 0.99
    44.         curiosity:
    45.             strength: 0.01
    46.             gamma: 0.90
    47.             encoding_size: 256
    48.         gail:
    49.             strength: 1.0
    50.             gamma: 0.99
    51.             encoding_size: 128
    52.             demo_path: RaceAgentN4_1.demo

    Code (CSharp):
    1. using System;
    2.  
    3. public class BikeAgent : Agent
    4. {
    5.     [SerializeField] private Transform m_SpawnPos;
    6.     [SerializeField] private Vehicle _vehicle; // ref
    7.     [SerializeField] private TrackCheckpoints _trackCheckpoints; // ref
    8.     [SerializeField] private Transform m_BikeSphere;
    9.  
    10.  
    11.     public override void Initialize()
    12.     {
    13.         base.Initialize();
    14.         _trackCheckpoints.OnPlayerCorrectCheckpoint += OnCorrectCheckPoint;
    15.         _trackCheckpoints.OnPlayerWrongCheckpoint -= OnWrongCheckPoint;
    16.         _vehicle.StopVehicle = false;
    17.     }
    18.  
    19.     //reward
    20.     void OnCorrectCheckPoint(Transform carTransform, bool isLapComplete)
    21.     {
    22.         //bike sphere
    23.         if(carTransform == this.m_BikeSphere)
    24.         {
    25.             AddReward(1f);
    26.             if (isLapComplete) AddReward(1f);
    27.             // print("Reward");
    28.         }
    29.  
    30.     }
    31.  
    32.     //punish
    33.     void OnWrongCheckPoint(Transform carTransform)
    34.     {
    35.         if (carTransform == this.m_BikeSphere)
    36.         {
    37.             AddReward(-1f);
    38.         }
    39.     }
    40.  
    41.     public override void OnEpisodeBegin()
    42.     {
    43.         base.OnEpisodeBegin();
    44.  
    45.         //reset vehicle
    46.         ResetVehicle();
    47.     }
    48.  
    49.     void ResetVehicle()
    50.     {
    51.         _vehicle.StopVehicle = true;
    52.         Vector3 spwnPos = m_SpawnPos.position + new Vector3(x: Random.Range(-3f, 3f), 0.75f, Random.Range(-2f, 2f));
    53.         transform.position = spwnPos;
    54.         m_BikeSphere.position = spwnPos;
    55.         transform.forward = m_SpawnPos.forward;
    56.         m_BikeSphere.forward = m_SpawnPos.forward;
    57.  
    58.         _trackCheckpoints.ResetCheckPoint(m_BikeSphere);      
    59.         //todo reset checkpoint
    60.     }
    61.  
    62.     //collect observation
    63.     public override void CollectObservations(VectorSensor sensor)
    64.     {
    65.         base.CollectObservations(sensor);
    66.  
    67.         Vector3 checkPointForward = _trackCheckpoints.GetNextCheckPoint(this.m_BikeSphere).transform.forward;
    68.         float dirDot = Vector3.Dot(this.transform.forward, checkPointForward);
    69.         sensor.AddObservation(dirDot);
    70.  
    71.         //m_SpawnPos.transform.position = _trackCheckpoints.GetPreviousCheckPoint(this.m_BikeSphere).position;
    72.         //print(dirDot);
    73.     }
    74.  
    75.     //action received
    76.     public override void OnActionReceived(float[] vectorAction)
    77.     {
    78.         base.OnActionReceived(vectorAction);
    79.  
    80.         //get off from the track
    81.         if (transform.position.y < 0f)
    82.         {
    83.             AddReward(-1f);
    84.             EndEpisode();
    85.         }
    86.         else
    87.         {
    88.             _vehicle.StopVehicle = false;
    89.         }
    90.  
    91.         float forwardAmount = 0f;
    92.         float turnAmount = 0f;
    93.  
    94.         forwardAmount = Mathf.FloorToInt(vectorAction[0]);
    95.         turnAmount = Mathf.FloorToInt(vectorAction[1]);
    96.  
    97.         switch (forwardAmount)
    98.         {
    99.             case 0:
    100.                 //idle
    101.                 break;
    102.             case 1:
    103.                 //forward
    104.                 _vehicle.ControlAccelerate();
    105.                 break;
    106.             case 2:
    107.                 //backward
    108.                 _vehicle.ControlBrake();
    109.                 break;
    110.         }
    111.  
    112.         switch (turnAmount)
    113.         {
    114.             case 0:
    115.                 //idle
    116.                 break;
    117.             case 1:
    118.                 //left
    119.                 _vehicle.ControlSteer(-1);
    120.                 break;
    121.             case 2:
    122.                 //right
    123.                 _vehicle.ControlSteer(1);
    124.                 break;
    125.         }
    126.  
    127.         AddReward(-1f / MaxStep);
    128.     }
    129.  
    130.    
    131.     public override void Heuristic(float[] actionsOut)
    132.     {
    133.         base.Heuristic(actionsOut);
    134.  
    135.         //default idle
    136.         actionsOut[0] = 0; // forward
    137.         actionsOut[1] = 0; // turn
    138.  
    139.         //acclerate
    140.         if (Input.GetKey(KeyCode.W)) actionsOut[0] = 1;
    141.         // break
    142.         if (Input.GetKey(KeyCode.S)) actionsOut[0] = 2;
    143.  
    144.         // turn left
    145.         if (Input.GetKey(KeyCode.A)) actionsOut[1] = 1;
    146.         // turn right
    147.         if (Input.GetKey(KeyCode.D)) actionsOut[1] = 2;
    148.  
    149.     }
    150.  
    151.     //todo collision obstacle reward etc
    152.  
    153.     private void OnCollisionEnter(Collision other)
    154.     {
    155.         if (other.gameObject.CompareTag("wall"))
    156.         {
    157.             AddReward(-0.05f);
    158.         }
    159.     }
    160.  
    161.     private void OnCollisionStay(Collision other)
    162.     {
    163.         if (other.gameObject.CompareTag("wall"))
    164.         {
    165.             AddReward(-0.01f);
    166.         }
    167.     }
    168.  
    169.     // Update is called once per frame
    170.     void Update()
    171.     {
    172.         //todo update UI
    173.     }
    174. }
    175.  
     
  2. ruoping_unity

    ruoping_unity

    Unity Technologies

    Joined:
    Jul 10, 2020
    Posts:
    134
  3. shohan4556

    shohan4556

    Joined:
    Feb 3, 2016
    Posts:
    11
    thank you for your kind response. there was another issue rising after lots of training with behavioral cloning with gail and curiosity agents is not learning properly.