Search Unity

Agent training is working fine but exported model dose not working in Unity

Discussion in 'ML-Agents' started by shohan4556, Mar 30, 2021.

  1. shohan4556

    shohan4556

    Joined:
    Feb 3, 2016
    Posts:
    11
    I have training vehicle agent to follow a simple track. I am using behavioural cloning when I train the model I see it works fine but after that when I export it in Unity the agent just stuck.

    my config file
    ------------------

    Code (CSharp):
    1. default:
    2.     trainer: ppo
    3.     batch_size: 1024
    4.     beta: 5.0e-3
    5.     buffer_size: 10240
    6.     epsilon: 0.2
    7.     hidden_units: 128
    8.     lambd: 0.99
    9.     learning_rate: 3.0e-4
    10.     max_steps: 5000000
    11.     memory_size: 256
    12.     normalize: false
    13.     num_epoch: 3
    14.     num_layers: 2
    15.     time_horizon: 64
    16.     sequence_length: 64
    17.     summary_freq: 10000
    18.     use_recurrent: false
    19.     reward_signals:
    20.         extrinsic:
    21.             strength: 1.0
    22.             gamma: 0.99
    23.  
    24.  
    25. RaceAgent:
    26.     summary_freq: 10000
    27.     time_horizon: 64
    28.     batch_size: 256
    29.     buffer_size: 2048
    30.     hidden_units: 128
    31.     num_layers: 2
    32.     beta: 5.0e-4
    33.     learning_rate_schedule: linear
    34.     max_steps: 5.0e7
    35.     num_epoch: 3
    36.     behavioral_cloning:
    37.         demo_path: RaceAgentN4_1.demo
    38.         strength: 1.0
    39.         steps: 150000
    40.     reward_signals:
    41.         extrinsic:
    42.             strength: 0.1
    43.             gamma: 0.99
    44.         curiosity:
    45.             strength: 0.01
    46.             gamma: 0.90
    47.             encoding_size: 256
    48.         gail:
    49.             strength: 1.0
    50.             gamma: 0.99
    51.             encoding_size: 128
    52.             demo_path: RaceAgentN4_1.demo

    Code (CSharp):
    1. using System;
    2.  
    3. public class BikeAgent : Agent
    4. {
    5.     [SerializeField] private Transform m_SpawnPos;
    6.     [SerializeField] private Vehicle _vehicle; // ref
    7.     [SerializeField] private TrackCheckpoints _trackCheckpoints; // ref
    8.     [SerializeField] private Transform m_BikeSphere;
    9.  
    10.  
    11.     public override void Initialize()
    12.     {
    13.         base.Initialize();
    14.         _trackCheckpoints.OnPlayerCorrectCheckpoint += OnCorrectCheckPoint;
    15.         _trackCheckpoints.OnPlayerWrongCheckpoint -= OnWrongCheckPoint;
    16.         _vehicle.StopVehicle = false;
    17.     }
    18.  
    19.     //reward
    20.     void OnCorrectCheckPoint(Transform carTransform, bool isLapComplete)
    21.     {
    22.         //bike sphere
    23.         if(carTransform == this.m_BikeSphere)
    24.         {
    25.             AddReward(1f);
    26.             if (isLapComplete) AddReward(1f);
    27.             // print("Reward");
    28.         }
    29.  
    30.     }
    31.  
    32.     //punish
    33.     void OnWrongCheckPoint(Transform carTransform)
    34.     {
    35.         if (carTransform == this.m_BikeSphere)
    36.         {
    37.             AddReward(-1f);
    38.         }
    39.     }
    40.  
    41.     public override void OnEpisodeBegin()
    42.     {
    43.         base.OnEpisodeBegin();
    44.  
    45.         //reset vehicle
    46.         ResetVehicle();
    47.     }
    48.  
    49.     void ResetVehicle()
    50.     {
    51.         _vehicle.StopVehicle = true;
    52.         Vector3 spwnPos = m_SpawnPos.position + new Vector3(x: Random.Range(-3f, 3f), 0.75f, Random.Range(-2f, 2f));
    53.         transform.position = spwnPos;
    54.         m_BikeSphere.position = spwnPos;
    55.         transform.forward = m_SpawnPos.forward;
    56.         m_BikeSphere.forward = m_SpawnPos.forward;
    57.  
    58.         _trackCheckpoints.ResetCheckPoint(m_BikeSphere);      
    59.         //todo reset checkpoint
    60.     }
    61.  
    62.     //collect observation
    63.     public override void CollectObservations(VectorSensor sensor)
    64.     {
    65.         base.CollectObservations(sensor);
    66.  
    67.         Vector3 checkPointForward = _trackCheckpoints.GetNextCheckPoint(this.m_BikeSphere).transform.forward;
    68.         float dirDot = Vector3.Dot(this.transform.forward, checkPointForward);
    69.         sensor.AddObservation(dirDot);
    70.  
    71.         //m_SpawnPos.transform.position = _trackCheckpoints.GetPreviousCheckPoint(this.m_BikeSphere).position;
    72.         //print(dirDot);
    73.     }
    74.  
    75.     //action received
    76.     public override void OnActionReceived(float[] vectorAction)
    77.     {
    78.         base.OnActionReceived(vectorAction);
    79.  
    80.         //get off from the track
    81.         if (transform.position.y < 0f)
    82.         {
    83.             AddReward(-1f);
    84.             EndEpisode();
    85.         }
    86.         else
    87.         {
    88.             _vehicle.StopVehicle = false;
    89.         }
    90.  
    91.         float forwardAmount = 0f;
    92.         float turnAmount = 0f;
    93.  
    94.         forwardAmount = Mathf.FloorToInt(vectorAction[0]);
    95.         turnAmount = Mathf.FloorToInt(vectorAction[1]);
    96.  
    97.         switch (forwardAmount)
    98.         {
    99.             case 0:
    100.                 //idle
    101.                 break;
    102.             case 1:
    103.                 //forward
    104.                 _vehicle.ControlAccelerate();
    105.                 break;
    106.             case 2:
    107.                 //backward
    108.                 _vehicle.ControlBrake();
    109.                 break;
    110.         }
    111.  
    112.         switch (turnAmount)
    113.         {
    114.             case 0:
    115.                 //idle
    116.                 break;
    117.             case 1:
    118.                 //left
    119.                 _vehicle.ControlSteer(-1);
    120.                 break;
    121.             case 2:
    122.                 //right
    123.                 _vehicle.ControlSteer(1);
    124.                 break;
    125.         }
    126.  
    127.         AddReward(-1f / MaxStep);
    128.     }
    129.  
    130.    
    131.     public override void Heuristic(float[] actionsOut)
    132.     {
    133.         base.Heuristic(actionsOut);
    134.  
    135.         //default idle
    136.         actionsOut[0] = 0; // forward
    137.         actionsOut[1] = 0; // turn
    138.  
    139.         //acclerate
    140.         if (Input.GetKey(KeyCode.W)) actionsOut[0] = 1;
    141.         // break
    142.         if (Input.GetKey(KeyCode.S)) actionsOut[0] = 2;
    143.  
    144.         // turn left
    145.         if (Input.GetKey(KeyCode.A)) actionsOut[1] = 1;
    146.         // turn right
    147.         if (Input.GetKey(KeyCode.D)) actionsOut[1] = 2;
    148.  
    149.     }
    150.  
    151.     //todo collision obstacle reward etc
    152.  
    153.     private void OnCollisionEnter(Collision other)
    154.     {
    155.         if (other.gameObject.CompareTag("wall"))
    156.         {
    157.             AddReward(-0.05f);
    158.         }
    159.     }
    160.  
    161.     private void OnCollisionStay(Collision other)
    162.     {
    163.         if (other.gameObject.CompareTag("wall"))
    164.         {
    165.             AddReward(-0.01f);
    166.         }
    167.     }
    168.  
    169.     // Update is called once per frame
    170.     void Update()
    171.     {
    172.         //todo update UI
    173.     }
    174. }
    175.  
     
  2. ruoping_unity

    ruoping_unity

    Unity Technologies

    Joined:
    Jul 10, 2020
    Posts:
    134
  3. shohan4556

    shohan4556

    Joined:
    Feb 3, 2016
    Posts:
    11
    thank you for your kind response. there was another issue rising after lots of training with behavioral cloning with gail and curiosity agents is not learning properly.