Question Self driving car (need help with training)

leozhang1 · Aug 12, 2022

I am having trouble tuning the parameters to the proper values when training my self driving cars. I'm not sure if I'm rewarding and punishing in the right places...Each car agent is supposed to go thru a series of check points in a circular race track (similar to this video:
). However the agents seem to be having trouble making sharp turns. After training, the overall "trained" neural network can barely pass the first turn.

visualization of my track:

CarDriverAgents script:

Code (CSharp):

using System.Collections;

using System.Collections.Generic;

using UnityEngine;

using Unity.MLAgents;

using Unity.MLAgents.Actuators;

using Unity.MLAgents.Sensors;

public class CarDriverAgent : Agent

{

[SerializeField] private TrackCheckpoints trackCheckpoints;

[SerializeField] private Transform spawnPosition;

private CarDriver carDriver;

private Quaternion recallRotation;

private Vector3 recallPosition;

[System.Serializable]

public struct RewardsInfo

{

public float correctCheckpoint, wrongCheckpoint;

public float hitLastCheckpoint, hitAWall, slidingAlongWall;

public float movingForward, movingBackwards, noMovement;

public float notFacingCheckpoint;

}

[SerializeField] private RewardsInfo rwd;

public override void Initialize()

{

carDriver = GetComponent<CarDriver>();

recallRotation = new Quaternion(this.transform.rotation.x, this.transform.rotation.y, this.transform.rotation.z, this.transform.rotation.w);

recallPosition = spawnPosition.position + new Vector3(Random.Range(-5f,+5f), 0, Random.Range(-5f,+5f));

if (trackCheckpoints is null)

{

trackCheckpoints = GameObject.Find("CheckPoints").GetComponent<TrackCheckpoints>();

}

trackCheckpoints.OnCarCorrectCheckpoint += TrackCheckpoints_OnCarCorrectCheckpoint;

trackCheckpoints.OnCarWrongCheckpoint += TrackCheckpoints_OnCarWrongCheckpoint;

trackCheckpoints.OnAgentCompleteTrack += resetAgent;

trackCheckpoints.OnAgentCompleteTrack += rewardAgent;

}

private void TrackCheckpoints_OnCarCorrectCheckpoint(Transform carTransform)

{

if (carTransform == transform)

{

// print("correct checkpoint");

AddReward(rwd.correctCheckpoint);

}

}

private void TrackCheckpoints_OnCarWrongCheckpoint(Transform carTransform)

{

if (carTransform == transform)

{

// print("wrong checkpoint");

AddReward(rwd.wrongCheckpoint);

}

}

private void resetAgent()

{

EndEpisode();

}

private void rewardAgent()

{

AddReward(rwd.hitLastCheckpoint);

}

public override void OnEpisodeBegin()

{

// print("episode begin");

transform.position = recallPosition;

transform.forward = spawnPosition.forward;

transform.rotation = recallRotation;

trackCheckpoints.resetCheckPoint(transform);

carDriver.StopCompletely();

}

private float GetDotWithNextCheckpoint()

{

Vector3 checkpointForward = trackCheckpoints.GetNextCheckpoint(transform).transform.forward;

float directionDot = Vector3.Dot(transform.forward, checkpointForward);

return directionDot;

}

public override void CollectObservations(VectorSensor sensor)

{

var dot = GetDotWithNextCheckpoint();

if (dot < 0.9f)

{

AddReward(rwd.notFacingCheckpoint);

}

sensor.AddObservation(dot);

}

public override void OnActionReceived(ActionBuffers actions)

{

float forwardAmount = 0f, turnAmount = 0f;

switch (actions.DiscreteActions[0])

{

case 0:

forwardAmount = 0f;

AddReward(rwd.noMovement);

break;

case 1:

forwardAmount = +1f;

// encourage moving forward

AddReward(rwd.movingForward);

break;

case 2:

forwardAmount = -1f;

AddReward(rwd.movingBackwards);

break;

}

switch (actions.DiscreteActions[1])

{

case 0:

turnAmount = 0f;

break;

case 1:

turnAmount = +1f;

break;

case 2:

turnAmount = -1f;

break;

}

carDriver.SetInputs(forwardAmount, turnAmount);

}

public override void Heuristic(in ActionBuffers actionsOut)

{

int forwardAction = 0;

if (Input.GetKey(KeyCode.W) || Input.GetKey(KeyCode.UpArrow)) forwardAction = 1;

if (Input.GetKey(KeyCode.S) || Input.GetKey(KeyCode.DownArrow)) forwardAction = 2;

int turnAction = 0;

if (Input.GetKey(KeyCode.D) || Input.GetKey(KeyCode.RightArrow)) turnAction = 1;

if (Input.GetKey(KeyCode.A) || Input.GetKey(KeyCode.LeftArrow)) turnAction = 2;

ActionSegment<int> discreteActions = actionsOut.DiscreteActions;

discreteActions[0] = forwardAction;

discreteActions[1] = turnAction;

print($"dot with next checkpoint: {GetDotWithNextCheckpoint()}");

}

void OnCollisionEnter(Collision collision)

{

if (collision.gameObject.CompareTag("wall"))

{

// the car has hit a wall

// punish the ai

if (trackCheckpoints.GetNextCheckpoint(transform).name != "CheckpointSingle (67)")

{

// the harder you hit the wall, the more the punish

AddReward(rwd.hitAWall * collision.relativeVelocity.sqrMagnitude);

}

// EndEpisode();

// print("ended episode");

}

}

void OnCollisionStay(Collision collision)

{

if (collision.gameObject.CompareTag("wall"))

{

// the car has hit a wall

// punish the ai

// avoid the ai from driving the car along the wall

AddReward(rwd.slidingAlongWall);

}

}

}

CarAI.yaml:

Code (CSharp):

behaviors:

CarDriver:

trainer_type: ppo

hyperparameters:

batch_size: 256

buffer_size: 10240

learning_rate: 0.0003

beta: 0.0005

epsilon: 0.2

lambd: 0.99

num_epoch: 3

learning_rate_schedule: linear

network_settings:

normalize: false

hidden_units: 128

num_layers: 2

reward_signals:

extrinsic:

strength: 1

gamma: 0.99

max_steps: 7500000

time_horizon: 64

summary_freq: 5000000

threaded: true

Search Unity

Question Self driving car (need help with training)

leozhang1

Attached Files:

upload_2022-8-12_8-28-25.png

Search Unity

Unity ID

Useful Searches

Question Self driving car (need help with training)

leozhang1

Attached Files:

upload_2022-8-12_8-28-25.png