Question My agents don't seem to learn

erictr · Sep 7, 2020

Hello, I am trying to create an agent that can be able to move through a number of random platforms. I equipped the agent with multiple Ray Perception Sensors and tag 3 types of platforms (platform, respawn and goal). I used the pyramids example yaml file for the training. However after multiple attempts the agents don't seem to learn how to reach the goal. In my last attempt I reached 4860000 steps without good results. I hope some can help me, thanks.

Code (CSharp):

using System;

using UnityEngine;

using Unity.MLAgents;

using Unity.MLAgents.Sensors;

public class PlayerAgent : Agent

{

private Vector3 startPosition;

private Rigidbody rb;

public float moveSpeed = 2;

public float rotationSpeed = 180;

public float jumpSpeed = 5;

public float fallingForce = 1f;

public float collisionForce = 15f;

//public Transform target;

public GameObject destroyParticle;

private float previousDistance;

private float distance;

public PlataformGenerator myplat;

public override void Initialize()

{

rb = GetComponent<Rigidbody>();

startPosition = transform.position;

}

// Called at the beginning of an Agent's episode, including at the beginning of the simulation.

public override void OnEpisodeBegin()

{

transform.position = new Vector3(startPosition.x + UnityEngine.Random.Range(-7, 7), startPosition.y, startPosition.z + UnityEngine.Random.Range(-7, 7));

transform.rotation = Quaternion.identity;

rb.velocity = Vector3.zero;

rb.angularVelocity = Vector3.zero;

previousDistance = transform.position.z;

myplat.Generate();

}

// Called every time the Agent receives an action to take. Receives the action chosen by the Agent.

// It is also common to assign a reward in this method.

public override void OnActionReceived(float[] vectorAction)

{

//Debug.Log("v" + vectorAction[0] + "h" + vectorAction[1] + "j" + vectorAction[2]);

if (vectorAction[1] == 1)

{

Vector3 m_EulerAngleVelocity = new Vector3(0, -rotationSpeed, 0);

Quaternion deltaRotation = Quaternion.Euler(m_EulerAngleVelocity * Time.fixedDeltaTime);

rb.MoveRotation(rb.rotation * deltaRotation);

}

else if (vectorAction[1]== 2)

{

Vector3 m_EulerAngleVelocity = new Vector3(0, rotationSpeed, 0);

Quaternion deltaRotation = Quaternion.Euler(m_EulerAngleVelocity * Time.fixedDeltaTime);

rb.MoveRotation(rb.rotation * deltaRotation);

}

if (vectorAction[0] == 1)

{

rb.AddForce(transform.forward * moveSpeed, ForceMode.VelocityChange);

}

if (!IsGrounded())

{

rb.AddForce(Vector3.down * fallingForce, ForceMode.Acceleration);

}

if (vectorAction[2] == 1 && IsGrounded())

{

rb.velocity = (new Vector3(rb.velocity.x, jumpSpeed, rb.velocity.z));

AddReward(-0.01f);

}

distance = transform.position.z;

if (distance > previousDistance)

{

previousDistance = distance;

AddReward(0.1f);

}

//else

//{

//}

AddReward(-0.01f);

}

private bool IsGrounded()

{

RaycastHit hit;

Physics.Raycast(transform.position, Vector3.down, out hit);

if (hit.distance < 0.55 && hit.collider.gameObject.tag != "Respawn")

{

return true;

}

else

{

return false;

}

}

// Called every step that the Agent requests a decision. This is one possible way for

//collecting the Agent's observations of the environment; see Generating Observations below for more options.

public override void CollectObservations(VectorSensor sensor)

{

}

// method to generate the actions of the Agent. As such, the Heuristic() method writes to the array of

//floats provided to the Heuristic method as argument.

public override void Heuristic(float[] actionsOut)

{

Array.Clear(actionsOut, 0, actionsOut.Length);

if (Input.GetKey(KeyCode.W))

{

actionsOut[0] = 1f;

}

if (Input.GetKey(KeyCode.S))

{

actionsOut[0] = 2;

}

if (Input.GetKey(KeyCode.A))

{

actionsOut[1] = 1;

}

if (Input.GetKey(KeyCode.D))

{

actionsOut[1] = 2;

}

actionsOut[2] = Input.GetKey(KeyCode.Space) ? 1.0f : 0.0f;

}

private void OnCollisionEnter(Collision collision)

{

if (collision.gameObject.tag == "Finish")

{

AddReward(1);

EndEpisode();

}

if (collision.gameObject.tag == "Respawn")

{

Instantiate(destroyParticle, transform.position, transform.rotation);

AddReward(-1f);

EndEpisode();

}

if (collision.gameObject.tag == "Player")

{

Vector3 position = collision.gameObject.transform.position - transform.position;

rb.AddForce(-position.normalized * collisionForce, ForceMode.VelocityChange);

//rb.angularVelocity = Vector3.zero;

}

}

}

erictr · Sep 7, 2020

Luke-Houlihan · Sep 7, 2020

@erictr It doesn't look like you're giving the agent any information except the Ray sensors, this may not tell the agent everything it needs to know. You might try adding some additional vector observations like relative velocity and maybe forward direction in local euler angles.

When in doubt I tend to add all the observations I think the agent might find useful just to get the agent training, then remove possibly unnecessary observations to make training faster. Adding more observations makes training slower but not having the observations the agents needs makes training impossible.

Luke-Houlihan · Sep 7, 2020

Also if you're using PPO and randomizing a complex environment it may take more than 5 million steps to get acceptable behavior. One of my more complex agents needs 30 million steps or more just to explore the full reward space.

Search Unity

Unity ID

Useful Searches

Question My agents don't seem to learn

erictr

erictr

Luke-Houlihan

Luke-Houlihan