Search Unity

  1. We are migrating the Unity Forums to Unity Discussions by the end of July. Read our announcement for more information and let us know if you have any questions.
    Dismiss Notice
  2. Dismiss Notice

Question On Demand Decision

Discussion in 'ML-Agents' started by wyl666888, May 7, 2024.

  1. wyl666888

    wyl666888

    Joined:
    May 7, 2024
    Posts:
    2
    In mlagents, I want to make decisions on demand, that is, after waiting for the agent to complete the action, set a series of operations such as rewards, and then conduct observation space. I used the requestdecision method and didn't use the automatic decision component. It seemed to work, but I found that I used a coroutine while waiting for the action to execute, which kept calling the environment.step function in fixedupdate. Causes the turn to end without the specified number of moves. What should be done? Or how to set it according to such requirements?
    Here's my code

    using UnityEngine;
    using Unity.MLAgents;
    using Unity.MLAgents.Actuators;
    using Unity.MLAgents.Sensors;
    using Random = UnityEngine.Random;
    using System.Collections;
    using Unity.Mathematics;
    using System.Reflection;

    public class robotAgent : Agent
    {
    [Header("Specific to Ball3D")]
    public GameObject valve;

    [Tooltip("Whether to use vector observation. This option should be checked " +
    "in 3DBall scene, and unchecked in Visual3DBall scene. ")]
    EnvironmentParameters m_ResetParams;
    public ArmController armController;
    public GameObject markValve;
    public int steps;
    public float preAngle;
    public bool isComplete;
    public GameObject markLeft;
    public GameObject markRight;
    public GameObject desireGoal;
    public override void Initialize()
    {
    isComplete = true;
    armController.ResetAll();
    armController.ResetFlags();
    m_ResetParams = Academy.Instance.EnvironmentParameters;
    SetResetParameters();
    preAngle = 0;
    steps = 0;
    }

    public override void CollectObservations(VectorSensor sensor)
    {
    float[] currentState = armController.GetObservation();
    for (int i = 0; i < 6; i++)
    {
    sensor.AddObservation(currentState / 180);
    }
    sensor.AddObservation(armController.GetValvePosition() / 90);
    sensor.AddObservation(markLeft.transform.position - markValve.transform.position);
    sensor.AddObservation(markRight.transform.position - markValve.transform.position);
    sensor.AddObservation(markValve.transform.position);
    sensor.AddObservation((markLeft.transform.position + markRight.transform.position) / 2.0f);
    sensor.AddObservation(desireGoal.transform.position);
    sensor.AddObservation(markValve.transform.position);
    isComplete = false;
    }

    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
    steps++;
    float[] targetAngle = {
    actionBuffers.ContinuousActions[0], actionBuffers.ContinuousActions[1],
    actionBuffers.ContinuousActions[2], actionBuffers.ContinuousActions[3],
    actionBuffers.ContinuousActions[4], 180.0f
    };
    //float[] currentState = armController.GetCurrentJointState();
    for (int i = 0; i < 5; i++)
    {
    targetAngle = (targetAngle + 1) * 90;
    targetAngle = math.clamp(targetAngle, 0, 180);
    }
    armController.SetJointPositions(targetAngle);
    StartCoroutine(WaitComplete());

    }

    public override void OnEpisodeBegin()
    {
    Debug.Log("回合开始了!!!");
    isComplete = true;
    steps = 0;
    armController.ResetAll();
    armController.ResetFlags();
    RequestDecision();
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {

    }


    public void SetResetParameters()
    {
    steps = 0;
    preAngle = 0;
    armController.ResetAll();
    armController.ResetFlags();
    }

    public float ComputeReward()
    {
    float distenceReward = (Vector3.Distance(markLeft.transform.position, markValve.transform.position) +
    Vector3.Distance(markRight.transform.position, markValve.transform.position)) / 2;
    float transformReward = Vector3.Distance(desireGoal.transform.position, markValve.transform.position);
    var reward = -0.3f * distenceReward / 4.5f - 0.7f * transformReward / 0.7829f;
    reward = reward * 2 + 1;
    if (armController.collisionHandler.isErrorCollision == true)
    reward -= 20.0f;
    if (armController.GetValvePosition() >= 85)
    reward += 20.0f;
    return reward;
    }

    IEnumerator WaitComplete()
    {
    float initstartTime = Time.time;
    float initTimeout = 20.0f;
    while (!armController.GetIsStuck() && !armController.AreJointsAtTargetAngle()
    && !armController.collisionHandler.isErrorCollision) //等待条件:没有卡住 且 没有到达 且 没有碰撞
    {
    if (Time.time - initstartTime >= initTimeout)
    {
    //Debug.LogWarning("等待超时");
    break;
    }
    //Debug.Log("等待");
    yield return null;
    }
    isComplete = true;
    if (isComplete)
    {
    //if (armController.AreJointsAtTargetAngle() || armController.collisionHandler.isErrorCollision)
    // Debug.Log("动作执行完毕!!!");
    var reward = ComputeReward();
    SetReward(reward);
    Debug.Log("第"+steps + "步" + reward);
    if (armController.collisionHandler.isErrorCollision)
    {
    Debug.Log("回合结束了!!!");
    EndEpisode();
    }
    if (armController.GetValvePosition() >= 85)
    {
    Debug.Log("回合结束了!!!");
    EndEpisode();
    }
    if (steps >= 50)
    {
    Debug.Log("回合结束了!!!");
    EndEpisode();
    }
    RequestDecision();
    }
    }

    }
     
  2. smallg2023

    smallg2023

    Joined:
    Sep 2, 2018
    Posts:
    154
    yh coroutines don't work correctly with increased timescale if you use
    yield return null
    you should get better results if you use
    yield return new waitfornextfixedupdate