Search Unity

Question Trying to get best possible performance for simple transform chain animating Jobs, Help Needed

Discussion in 'Entity Component System' started by Nirvan, Feb 25, 2023.

  1. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    Hello,

    I want to animate chain of transforms (destination is skinned mesh renderer bones)
    using some vector and quaternion calculations and I finally get it right using Jobs.
    I am searching for making it more performant and I made simple manager.

    It looks like this right now:
    upload_2023-2-25_10-36-41.gif

    The destination is each of this tube having it's individual movement, there I used sinus offset position just to better visualize motion and ensure that it's working.

    Here code for the chain animating:
    Code (CSharp):
    1. using System.Collections.Generic;
    2. using Unity.Burst;
    3. using Unity.Collections;
    4. using Unity.Jobs;
    5. using Unity.Mathematics;
    6. using UnityEngine;
    7. using UnityEngine.Jobs;
    8.  
    9.  
    10. public class DOTSChainTransforming : MonoBehaviour
    11. {
    12.     public List<Transform> toAnimate;
    13.  
    14.     // Job execution arrays
    15.     TransformAccessArray jTransforms;
    16.  
    17.     NativeArray<quaternion> jRotation;
    18.     NativeArray<float3> jProceduralPos;
    19.     NativeArray<quaternion> jParentRot;
    20.  
    21.  
    22.     void Start()
    23.     {
    24.         // Just prepare jobs to execute in Update()
    25.         jTransforms = new TransformAccessArray(toAnimate.ToArray());
    26.         jProceduralPos = new NativeArray<float3>(jTransforms.length, Allocator.TempJob);
    27.         jRotation = new NativeArray<quaternion>(jTransforms.length, Allocator.TempJob);
    28.         jParentRot = new NativeArray<quaternion>(jTransforms.length, Allocator.TempJob);
    29.  
    30.         // Set some initial values
    31.         for (int i = 0; i < toAnimate.Count; i++)
    32.         {
    33.             jProceduralPos[i] = toAnimate[i].position;
    34.             jRotation[i] = toAnimate[i].rotation;
    35.             jParentRot[i] = i == 0 ? quaternion.identity : toAnimate[i - 1].rotation;
    36.         }
    37.  
    38.         ManagerDOTSChainTransforming.Instance.Add(this);
    39.     }
    40.  
    41.  
    42.     // Just like Update()
    43.     public JobHandle GetUpdateJob(float elapsedTimeForSinus, float delta)
    44.     {
    45.         // Sync root transform with its world position
    46.         jProceduralPos[0] = transform.position + new Vector3(math.sin(elapsedTimeForSinus), 0, 0);
    47.  
    48.         // Calculate Position
    49.         var jobHandle = new JPositionProcessing(jRotation, jProceduralPos, delta).Schedule(jTransforms.length, 128);
    50.         // Adjust Rotation
    51.         jobHandle = new JRotationProcessing(jProceduralPos, jParentRot).Schedule(jTransforms.length, 128, jobHandle);
    52.         // Apply changes to be visible in render
    53.         jobHandle = new JApplyTailMotion(jProceduralPos, jParentRot, jTransforms.length).Schedule(jTransforms, jobHandle);
    54.  
    55.         return jobHandle;
    56.     }
    57.  
    58.  
    59.     // Clean all
    60.     private void OnDestroy()
    61.     {
    62.         jTransforms.Dispose();
    63.         jProceduralPos.Dispose();
    64.         jRotation.Dispose();
    65.         jParentRot.Dispose();
    66.     }
    67.  
    68.  
    69.  
    70.     // Calculate Position
    71.     [BurstCompile]
    72.     struct JPositionProcessing : IJobParallelFor
    73.     {
    74.         #region The Constructor - hide to not distract
    75.         [ReadOnly] NativeArray<quaternion> rotations;
    76.         NativeArray<float3> procedPos;
    77.         [ReadOnly] float delta;
    78.  
    79.         public JPositionProcessing(
    80.             NativeArray<quaternion> r,
    81.             NativeArray<float3> procP,
    82.             float timeDelta
    83.             )
    84.         {
    85.             rotations = r;
    86.             procedPos = procP;
    87.             delta = timeDelta;
    88.         }
    89.         #endregion
    90.  
    91.         public void Execute(int i)
    92.         {
    93.             if (i == 0) return;
    94.  
    95.             quaternion segmentOrientation = rotations[i - 1];
    96.  
    97.             // Just move towards back of the transform
    98.             float3 targetPos = procedPos[i - 1] + math.mul(segmentOrientation, new float3(0.0f, 0.0f, -0.1f)); // Separate by 0.14 units
    99.             float3 toTargetPos = targetPos - procedPos[i];
    100.  
    101.             procedPos[i] += toTargetPos * delta;
    102.         }
    103.     }
    104.  
    105.  
    106.  
    107.     // Adjust Rotation
    108.     [BurstCompile]
    109.     struct JRotationProcessing : IJobParallelFor
    110.     {
    111.         #region The Constructor - hide to not distract
    112.         [ReadOnly] NativeArray<float3> procedPos;
    113.         NativeArray<quaternion> rotForParent;
    114.  
    115.         public JRotationProcessing(
    116.             NativeArray<float3> procP,
    117.             NativeArray<quaternion> pr
    118.             )
    119.         {
    120.             procedPos = procP;
    121.             rotForParent = pr;
    122.         }
    123.         #endregion
    124.  
    125.         public void Execute(int i)
    126.         {
    127.             if (i == 0) return;
    128.             rotForParent[i] = FromToRotation(new float3(0,0,-1),  procedPos[i] - procedPos[i - 1]);
    129.         }
    130.  
    131.         #region Utililtty
    132.         quaternion FromToRotation(float3 from, float3 to)
    133.         {
    134.             return quaternion.AxisAngle(
    135.  
    136.                  math.normalize(math.cross(from, to)),
    137.  
    138.                  math.acos
    139.                  (
    140.                      math.clamp(math.dot(math.normalize(from),
    141.                      math.normalize(to)), -1f, 1f))
    142.                  );
    143.         }
    144.         #endregion
    145.     }
    146.  
    147.  
    148.  
    149.     // Apply changes to be visible in render
    150.     [BurstCompile]
    151.     struct JApplyTailMotion : IJobParallelForTransform
    152.     {
    153.         #region The Constructor - hide to not distract
    154.         [ReadOnly] NativeArray<float3> procedPos;
    155.         [ReadOnly] NativeArray<quaternion> rotForParent;
    156.         [ReadOnly] int segmentsCount;
    157.  
    158.         public JApplyTailMotion(
    159.             NativeArray<float3> p,
    160.             NativeArray<quaternion> pr, int c)
    161.         {
    162.             procedPos = p;
    163.             rotForParent = pr;
    164.             segmentsCount = c;
    165.         }
    166.         #endregion
    167.  
    168.         public void Execute(int i, TransformAccess t)
    169.         {
    170.             t.position = procedPos[i];
    171.             if (i < segmentsCount - 1) t.rotation = rotForParent[i + 1];
    172.         }
    173.     }
    174.  
    175.  
    176. }
    177.  

    And manager code:
    Code (CSharp):
    1. using System.Collections.Generic;
    2. using Unity.Collections;
    3. using Unity.Jobs;
    4. using UnityEngine;
    5.  
    6. public class ManagerDOTSChainTransforming : MonoBehaviour
    7. {
    8.     public static ManagerDOTSChainTransforming Instance { get; private set; }
    9.     private void Awake() => Instance = this;
    10.  
    11.  
    12.     private List<DOTSChainTransforming> chainAnimators = new List<DOTSChainTransforming>();
    13.     private NativeArray<JobHandle> handles = new NativeArray<JobHandle>();
    14.  
    15.     public void Add(DOTSChainTransforming chainAnim)
    16.     {
    17.         chainAnimators.Add(chainAnim);
    18.     }
    19.  
    20.     void Update()
    21.     {
    22.         handles = new NativeArray<JobHandle>(chainAnimators.Count, Allocator.TempJob);
    23.  
    24.         var count = chainAnimators.Count;
    25.  
    26.         if (handles.IsCreated)
    27.         {
    28.             if (handles.Length != count)
    29.             {
    30.                 handles.Dispose();
    31.                 handles = new NativeArray<JobHandle>(count, Allocator.Persistent);
    32.             }
    33.         }
    34.         else
    35.         {
    36.             handles = new NativeArray<JobHandle>(count, Allocator.Persistent);
    37.         }
    38.  
    39.         float timeElapsedForSinus = Time.time * 3f;
    40.         float deltaTime = Time.smoothDeltaTime;
    41.         if (deltaTime > 0.5f) deltaTime = 0.5f; // Limit max delta time to improve stability
    42.         deltaTime *= 60f; // 60 to speed up chain-follow animation
    43.  
    44.         for (int i = 0; i < count; i++)
    45.         {
    46.             handles[i] = chainAnimators[i].GetUpdateJob(timeElapsedForSinus + (float)i * 0.5f, deltaTime);
    47.         }
    48.  
    49.         // Complete all the job handles
    50.         JobHandle.CompleteAll(handles);
    51.     }
    52. }

    I want to use chunked managing JobHandles (can''t figure it out :/) in the manager
    or use Entities for boost the performance more -can someone show some example how to convert such code as mine? I am not fully sure if it's applicable.

    I prepared simpliest classes I could do for easier convert it.
    I am attaching the executable scene with scripts in the post files.


    Thank you!
     

    Attached Files:

    Last edited: Feb 25, 2023
  2. suity447

    suity447

    Joined:
    Oct 18, 2022
    Posts:
    33
    Skinned mesh rendering for entities is possible but experimental. You might have better luck using one of the user made animation frameworks (I have not used them).
    This line:
    float3 targetPos = procedPos[i - 1] + .....
    in JPositionProcessing will create different results depending on which thread runs first. This should trigger the safety system. I suspect because of your high innerloopbatchcount of 128 you do not get an error. I do not see how to process this in parallel as the result of one index depends on all the results in the loop before it. A simple loop in an IJob might be the way to go.
    This part will create a memory leak as a new array is allocated but not disposed every frame:
    Code (CSharp):
    1.  handles = new NativeArray<JobHandle>(chainAnimators.Count, Allocator.TempJob);
    2.         var count = chainAnimators.Count;
    3.         if (handles.IsCreated)
    4.         {
    5.             if (handles.Length != count)
    6.             {
    7.                 handles.Dispose();
    8.                 handles = new NativeArray<JobHandle>(count, Allocator.Persistent);
    9.             }
    10.         }
    11.         else
    12.         {
    13.             handles = new NativeArray<JobHandle>(count, Allocator.Persistent);
    14.         }
    You can rewrite it like this (deleting the if block):
    Code (CSharp):
    1.  handles = new NativeArray<JobHandle>(chainAnimators.Count, Allocator.Temp);
    2.        
    In FromToRotation you can remove the normalize for the from vector as you are already passing a normal vector. (The compiler might precalculate it anyway in this case as you are passing a de facto constant value)
     
    Nirvan likes this.
  3. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    Thank you! That cleaned up some stuff.
    I probably will skip skinned mesh entities for now.

    I just noticed something really strange (it was still present before changes)
    that if I disable burst compile I am getting like ~20% better performance. Feels like it should be slower without burst but it's not, no idea why.

    Any ideas if I could modify manager script for chunked handles execution without writing like 5x more code?
    Or would it be even worth it?
     
    Last edited: Feb 25, 2023
  4. suity447

    suity447

    Joined:
    Oct 18, 2022
    Posts:
    33
    I do not understand what you mean by "chunked handles execution" and googling it does not net useful results. Would you mind explaining?
     
  5. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    Oh, so maybe that's not a thing. Chat GPT suggested this :confused: (as well as trying with entities)
    In theory it would prevent calling whole jobHandles array at once in the manager, but grouping jobs into chunks like 10 handles per chunk.

    Calling all calculations in a single jobHandle with one big array per required list (jTransforms, jRotation etc)
    managed to compute all required chain animator instances at once could be fastest solution?
    I was hoping for some elegant approach for achieving this.
     
    Last edited: Feb 25, 2023
  6. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,271
    Yes. This is likely the fastest. A common trick is to use a NativeArray<int2> where the int2 represents a start and count inside another NativeArray with [NativeDisableParallelForRestriction]. Then you would have one job to "capture" the existing transforms, one that processes entire tubes at a time, computing the local space transforms of each bone, and one job that writes the local space transforms back.

    If you did want to use Entities, I made one of the more popular animation frameworks for Entities, and could definitely help guide you if you wanted to go down that route.
     
    Nirvan likes this.