Search Unity

  1. We are migrating the Unity Forums to Unity Discussions. On July 12, the Unity Forums will become read-only.

    Please, do not make any changes to your username or email addresses at id.unity.com during this transition time.

    It's still possible to reply to existing private message conversations during the migration, but any new replies you post will be missing after the main migration is complete. We'll do our best to migrate these messages in a follow-up step.

    On July 15, Unity Discussions will become read-only until July 18, when the new design and the migrated forum contents will go live.


    Read our full announcement for more information and let us know if you have any questions.

Question Issue with ML Agents

Discussion in 'ML-Agents' started by Gawri6258, Jun 7, 2024.

  1. Gawri6258

    Gawri6258

    Joined:
    Jun 7, 2024
    Posts:
    1
    Hi,

    I am new to mlagents and have been trying to train a game demo to understand the working of the mlagents but it doesn't wanna work!!

    I have been at it for weeks now. whenever I launch mlagents-learn command it just hangs up and the action is not registered

    My player agent code is:

    using UnityEngine;
    using Unity.MLAgents;
    using Unity.MLAgents.Sensors;
    using Unity.MLAgents.Actuators;

    public class PlayerAgent : Agent
    {
    private Rigidbody playerRigidbody;
    private GameManager gameManager;
    private Player _playerScript;
    public GameObject playerPrefab;

    public override void Initialize()
    {
    playerRigidbody = GetComponent<Rigidbody>();
    gameManager = FindObjectOfType<GameManager>();
    _playerScript = GetComponent<Player>();

    Debug.Log("PlayerAgent initialized.");
    }

    public override void OnEpisodeBegin()
    {
    Debug.Log("Episode Begin");
    if (gameManager.currentPlayer != null)
    {
    Destroy(gameManager.currentPlayer);
    Debug.Log("Destroyed existing player.");
    }
    gameManager.newGame();
    Debug.Log("Called gameManager.newGame()");

    GameObject newPlayer = Instantiate(playerPrefab, Vector3.zero, Quaternion.identity);
    gameManager.currentPlayer = newPlayer;
    Debug.Log("Instantiated new player.");

    Debug.Log("New game started, player reset.");
    }

    public override void CollectObservations(VectorSensor sensor)
    {
    Debug.Log("Collecting Observations");
    sensor.AddObservation(transform.localPosition);
    sensor.AddObservation(transform.localRotation.eulerAngles.z);
    sensor.AddObservation(gameManager.phase);

    Debug.Log($"Position: {transform.localPosition}, Rotation: {transform.localRotation.eulerAngles.z}, Phase: {gameManager.phase}");
    }

    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
    int rotate = actionBuffers.DiscreteActions[0];
    int strafe = actionBuffers.DiscreteActions[1];
    Debug.Log($"Action Received - Rotate: {rotate}, Strafe: {strafe}");

    _playerScript.Movement_V1(rotate, strafe);

    // Rewards and penalties
    if (gameManager.spawnsDestroyed > 0)
    {
    SetReward(gameManager.spawnsDestroyed * 0.1f);
    Debug.Log($"Reward for spawns destroyed: {gameManager.spawnsDestroyed * 0.1f}");
    }
    if (gameManager.phase > 1)
    {
    SetReward(gameManager.phase * 0.5f);
    Debug.Log($"Reward for phase: {gameManager.phase * 0.5f}");
    }
    if (gameManager.currentPlayer == null)
    {
    SetReward(-1.0f);
    Debug.Log("Player destroyed, ending episode with penalty.");
    Academy.Instance.StatsRecorder.Add("PhaseReached", gameManager.phase);
    EndEpisode();
    }
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {
    var discreteActionsOut = actionsOut.DiscreteActions;

    // Manual control for testing
    discreteActionsOut[0] = 0; // Default no rotation
    if (Input.GetKey(KeyCode.LeftArrow))
    {
    discreteActionsOut[0] = 1;
    }
    else if (Input.GetKey(KeyCode.RightArrow))
    {
    discreteActionsOut[0] = 2;
    }

    discreteActionsOut[1] = 0; // Default no strafe
    if (Input.GetKey(KeyCode.W))
    {
    discreteActionsOut[1] = 1;
    }
    else if (Input.GetKey(KeyCode.S))
    {
    discreteActionsOut[1] = 2;
    }
    else if (Input.GetKey(KeyCode.D))
    {
    discreteActionsOut[1] = 3;
    }
    else if (Input.GetKey(KeyCode.A))
    {
    discreteActionsOut[1] = 4;
    }

    Debug.Log($"Heuristic actions - Rotate: {discreteActionsOut[0]}, Strafe: {discreteActionsOut[1]}");
    }
    }


    upload_2024-6-7_22-52-5.png

    upload_2024-6-7_23-22-5.png
    upload_2024-6-7_23-22-42.png

    My yaml:


    behaviors:
    ControlStyle1:
    trainer_type: ppo
    hyperparameters:
    batch_size: 64
    buffer_size: 2048
    learning_rate: 3.0e-4
    beta: 0.001
    epsilon: 0.2
    lambd: 0.99
    num_epoch: 5
    shared_critic: false
    learning_rate_schedule: linear
    network_settings:
    normalize: true
    hidden_units: 128
    num_layers: 2
    reward_signals:
    extrinsic:
    gamma: 0.99
    strength: 1.0
    max_steps: 50000
    time_horizon: 64
    summary_freq: 1000

    upload_2024-6-7_23-26-0.png

    Myb installation was on python 3.18