Search Unity

Agent not exploring the possible discrete branch

Discussion in 'ML-Agents' started by harpingseal, Aug 30, 2022.

  1. harpingseal

    harpingseal

    Joined:
    Sep 3, 2020
    Posts:
    57
    So I have two discrete branch, One is for the row, and one is for the amout, However after the first "Explosion" where the agents choose randomly, When one is left, the one that is left is not choosed, I am recreating nim, A game where theres multiple rows and you can only take from one row on your turn, The one who took the last coin will then win, Heres my manager code
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4.  
    5. public class NimManager : MonoBehaviour
    6. {
    7.     // Start is called before the first frame update
    8.     public int[] startvar;
    9.     public int[] internalvar;
    10.     public GameObject[] rowvar;
    11.     public float waittime = 0;
    12.     public float distancem = 1;
    13.     public Vector2 offset = new Vector2(0.0f,0.0f);
    14.     public GameObject background;
    15.     public int interval = 100;
    16.     private int internalIV =-300;
    17.     public NIMAgents a1;
    18.     public NIMAgents a2;
    19.     private int currentAgent;
    20.  
    21.     public IEnumerator CreateBoard()
    22.     {
    23.         int i = 0;
    24.         foreach (int value in startvar)
    25.         {
    26.             for (int e = 1; e < value; e++)
    27.             {
    28.                 GameObject duplicate = Instantiate(GameObject.Find(rowvar[i].transform.name + "/1"));
    29.                 duplicate.transform.parent = rowvar[i].transform;
    30.                 duplicate.transform.name = e.ToString();
    31.                 duplicate.transform.localPosition = new Vector3(offset.x, offset.y, e * distancem);
    32.                 yield return new WaitForSeconds(waittime);
    33.             }
    34.             i = i + 1;
    35.         }
    36.         i = 0;
    37.     }
    38.     public IEnumerator UpdateBoard()
    39.     {
    40.         int i = 0;
    41.         foreach (int value in startvar)
    42.         {
    43.             for (int e = 1; e < value; e++)
    44.             {
    45.                 Transform child = rowvar[i].transform.GetChild(i);
    46.                 child.gameObject.SetActive(true);
    47.                 yield return new WaitForSeconds(0);
    48.             }
    49.             i = i + 1;
    50.         }
    51.         i = 0;
    52.     }
    53.     void Start()
    54.     {
    55.         StartCoroutine(CreateBoard());
    56.     }
    57.     public void Play(int rows,int amt)
    58.     {
    59.         if (!rowvar[rows]) {
    60.             return;
    61.         }
    62.  
    63.         if (amt <= rowvar[rows].transform.childCount) {
    64.             for (int i = 0; i < amt; i++)
    65.             {
    66.  
    67.                 if (rowvar[rows].transform.GetChild(i).gameObject.activeSelf) {
    68.                     Transform child = rowvar[rows].transform.GetChild(i);
    69.                     child.gameObject.SetActive(false);
    70.                     internalvar[rows] = internalvar[rows]-1;
    71.                     GetCurrentAgent().SetReward(0.02f);
    72.                 }
    73.                 else
    74.                 {
    75.                    // GetCurrentAgent().SetReward(-0.05f);
    76.                 }
    77.             }
    78.         }
    79.     }
    80.     public NIMAgents GetCurrentAgent()
    81.     {
    82.         if (currentAgent == 1)
    83.         {
    84.             currentAgent = 0;
    85.             return a1;
    86.         }
    87.         else
    88.         {
    89.             currentAgent = 1;
    90.             return a2;
    91.         }
    92.     }
    93.     void FixedUpdate()
    94.     {
    95.         bool end = true;
    96.         foreach (int remaining in internalvar)
    97.         {
    98.             if (remaining > 0)
    99.             {
    100.                 end = false;
    101.                 break;
    102.             }
    103.         }
    104.         if (end == true) {
    105.             GetCurrentAgent().SetReward(1.0f);
    106.             GetCurrentAgent().SetReward(-1.0f);
    107.             internalvar = startvar;
    108.             StartCoroutine(UpdateBoard());
    109.             internalIV = -100;
    110.         }
    111.         if (internalIV < interval)
    112.         {
    113.             internalIV += 1;
    114.         }
    115.         else
    116.         {
    117.             internalIV = 0;
    118.            // Debug.Log("GETTING DECISION");
    119.             GetCurrentAgent().RequestDecision();
    120.          
    121.         }
    122.     }
    123. }
    124.  
    And the agent code
    Code (CSharp):
    1. using System.Collections;
    2. using System.Collections.Generic;
    3. using UnityEngine;
    4. using Unity.MLAgents;
    5. using Unity.MLAgents.Sensors;
    6. using Unity.MLAgents.Actuators;
    7.  
    8.  
    9. public class NIMAgents : Agent
    10. {
    11.     public GameObject managerEnv;
    12.     public NimManager manager;
    13.     // Start is called before the first frame update
    14.     void Start()
    15.     {
    16.        // manager = gameObject.GetComponent<NimManager>();
    17.     }
    18.     public override void CollectObservations(VectorSensor sensor)
    19.     {
    20.         // Target and Agent positions
    21.         sensor.AddObservation(manager.internalvar[0]);
    22.         sensor.AddObservation(manager.internalvar[1]);
    23.         sensor.AddObservation(manager.internalvar[2]);
    24.         sensor.AddObservation(manager.internalvar[3]);
    25.         sensor.AddObservation(manager.internalvar[4]);
    26.  
    27.     }
    28.     public override void OnActionReceived(ActionBuffers actionBuffers)
    29.     {
    30.         int row = actionBuffers.DiscreteActions[0];
    31.         int amount = actionBuffers.DiscreteActions[1];
    32.         manager.Play(row, amount);
    33.     }
    34.  
    35. }
    36.