Search Unity

  1. Unity Asset Manager is now available in public beta. Try it out now and join the conversation here in the forums.
    Dismiss Notice

Procedural Animation with Jobs... Slower than without jobs???

Discussion in 'DOTS Animation' started by Nirvan, Aug 27, 2020.

  1. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    I am trying to implement some as simpliest as can procedural follow behaviour using jobs system.
    I succeed but what I see it works slower than implementation without jobs.
    What is wrong in this construction?
    I attach full code with it.
    But also here as spoiler:

    Code (CSharp):
    1. using System.Collections.Generic;
    2. using Unity.Burst;
    3. using Unity.Collections;
    4. using Unity.Jobs;
    5. using Unity.Mathematics;
    6. using UnityEngine;
    7. using UnityEngine.Jobs;
    8.  
    9. public class DOTSTest : MonoBehaviour
    10. {
    11.     public GameObject segment;
    12.     public int Length = 1000;
    13.     public float Sepr = .65f;
    14.     [Range(0f, 1f)] public float reaction = 0.4f;
    15.  
    16.     TransformAccessArray jTransforms;
    17.     JobHandle jHandle;
    18.  
    19.     // Jobs 'variables'
    20.     NativeArray<float3> jPosition;
    21.     NativeArray<float3> jLocalPosition;
    22.     NativeArray<quaternion> jRotation;
    23.  
    24.     NativeArray<float3> jProceduralPos;
    25.  
    26.     NativeArray<quaternion> jParentRot;
    27.  
    28.     NativeArray<float3> jInitLocPos;
    29.     NativeArray<quaternion> jInitLocRot;
    30.  
    31.     void Start()
    32.     {
    33.  
    34.         #region Generating object of any length for stress testing
    35.  
    36.         Transform parent = transform;
    37.         List<Transform> transforms = new List<Transform>();
    38.  
    39.         for (int i = 0; i < Length; i++)
    40.         {
    41.             GameObject s = Instantiate(segment);
    42.             s.transform.position = transform.position + new Vector3(0f, 0f, i * Sepr);
    43.             s.transform.SetParent(parent, true);
    44.             parent = s.transform;
    45.             transforms.Add(parent);
    46.         }
    47.  
    48.         #endregion
    49.  
    50.  
    51.         jTransforms = new TransformAccessArray(transforms.ToArray());
    52.         jPosition = new NativeArray<float3>(jTransforms.length, Allocator.TempJob);
    53.         jLocalPosition = new NativeArray<float3>(jTransforms.length, Allocator.TempJob);
    54.         jRotation = new NativeArray<quaternion>(jTransforms.length, Allocator.TempJob);
    55.  
    56.         jProceduralPos = new NativeArray<float3>(jTransforms.length, Allocator.Persistent);
    57.  
    58.         jInitLocPos = new NativeArray<float3>(jTransforms.length, Allocator.Persistent);
    59.         jInitLocRot = new NativeArray<quaternion>(jTransforms.length, Allocator.Persistent);
    60.         jParentRot = new NativeArray<quaternion>(jTransforms.length, Allocator.Persistent);
    61.  
    62.  
    63.         // Assigning initial values for lists
    64.         for (int i = 0; i < Length; i++)
    65.         {
    66.             jInitLocPos[i] = transforms[i].localPosition;
    67.             jInitLocRot[i] = transforms[i].localRotation;
    68.             jProceduralPos[i] = transforms[i].position;
    69.             jParentRot[i] = quaternion.identity;
    70.         }
    71.  
    72.     }
    73.  
    74.     void Update()
    75.     {
    76.         jProceduralPos[0] = transform.position;
    77.  
    78.         jHandle = new JCalibrateTransforms(jPosition, jLocalPosition, jRotation, jInitLocPos, jInitLocRot).Schedule(jTransforms, jHandle);
    79.         jHandle = new JPositionProcessing(jRotation, jProceduralPos, reaction, Sepr).Schedule(jTransforms.length, 2048, jHandle);
    80.         jHandle = new JRotationProcessing(jProceduralPos, jParentRot).Schedule(jTransforms.length, 2048, jHandle);
    81.  
    82.         jHandle = new JApplyTailMotion(jProceduralPos, jParentRot, jTransforms.length).Schedule(jTransforms, jHandle);
    83.  
    84.         jHandle.Complete();
    85.     }
    86.  
    87.     private void OnDestroy()
    88.     {
    89.         jTransforms.Dispose();
    90.         jPosition.Dispose();
    91.         jLocalPosition.Dispose();
    92.         jRotation.Dispose();
    93.         jInitLocPos.Dispose();
    94.         jInitLocRot.Dispose();
    95.         jProceduralPos.Dispose();
    96.         jParentRot.Dispose();
    97.     }
    98.  
    99.  
    100.     // Jobs
    101.  
    102.     // Copy info from transfroms and calibrate before animating
    103.     [BurstCompile]
    104.     struct JCalibrateTransforms : IJobParallelForTransform
    105.     {
    106.         [WriteOnly] NativeArray<float3> positions;
    107.         [WriteOnly] NativeArray<float3> localPos;
    108.         [WriteOnly] NativeArray<quaternion> rotations;
    109.  
    110.         [ReadOnly] NativeArray<float3> initLocalPos;
    111.         [ReadOnly] NativeArray<quaternion> initLocalRot;
    112.  
    113.         public JCalibrateTransforms(
    114.             NativeArray<float3> p,
    115.             NativeArray<float3> locP,
    116.             NativeArray<quaternion> r,
    117.             NativeArray<float3> initLp,
    118.             NativeArray<quaternion> initLr)
    119.         {
    120.             positions = p;
    121.             localPos = locP;
    122.             rotations = r;
    123.             initLocalPos = initLp;
    124.             initLocalRot = initLr;
    125.         }
    126.  
    127.         public void Execute(int i, TransformAccess t)
    128.         {
    129.             t.localPosition = initLocalPos[i]; // Not needed in example but needed in my own code
    130.             t.localRotation = initLocalRot[i];
    131.  
    132.             positions[i] = t.position;
    133.             localPos[i] = t.localPosition;
    134.             rotations[i] = t.rotation;
    135.         }
    136.     }
    137.  
    138.  
    139.     // Moving segment to front of parent
    140.     [BurstCompile]
    141.     struct JPositionProcessing : IJobParallelFor
    142.     {
    143.         [ReadOnly] NativeArray<quaternion> rotations;
    144.         NativeArray<float3> procedPos;
    145.  
    146.         [ReadOnly] float reaction;
    147.         [ReadOnly] float sep;
    148.  
    149.         public JPositionProcessing(
    150.             NativeArray<quaternion> r,
    151.             NativeArray<float3> procP,
    152.             float re, float s)
    153.         {
    154.             rotations = r;
    155.             procedPos = procP;
    156.             reaction = re;
    157.             sep = s;
    158.         }
    159.  
    160.         public void Execute(int i)
    161.         {
    162.             if (i != 0)
    163.             {
    164.                 quaternion segmentOrientation = rotations[i - 1];
    165.                 float3 targetPos = procedPos[i - 1] + math.mul(segmentOrientation, new float3(0f, 0f, sep));
    166.                 float3 toTargetPos = targetPos - procedPos[i];
    167.                 procedPos[i] += toTargetPos * reaction;
    168.             }
    169.         }
    170.     }
    171.  
    172.  
    173.     [BurstCompile]
    174.     struct JRotationProcessing : IJobParallelFor
    175.     {
    176.         [ReadOnly] NativeArray<float3> procedPos;
    177.         NativeArray<quaternion> rotForParent;
    178.  
    179.         public JRotationProcessing(
    180.             NativeArray<float3> procP,
    181.             NativeArray<quaternion> pr
    182.             )
    183.         {
    184.             procedPos = procP;
    185.             rotForParent = pr;
    186.         }
    187.  
    188.         public void Execute(int i)
    189.         {
    190.             if (i > 0) rotForParent[i] = quaternion.LookRotation(procedPos[i] - procedPos[i - 1], new float3(0f, 1f, 0f));
    191.         }
    192.     }
    193.  
    194.  
    195.  
    196.     [BurstCompile]
    197.     struct JApplyTailMotion : IJobParallelForTransform
    198.     {
    199.         [ReadOnly] NativeArray<float3> procedPos;
    200.         [ReadOnly] NativeArray<quaternion> rotForParent;
    201.         [ReadOnly] int segmentsCount;
    202.  
    203.         public JApplyTailMotion(
    204.             NativeArray<float3> p,
    205.             NativeArray<quaternion> pr, int c)
    206.         {
    207.             procedPos = p;
    208.             rotForParent = pr;
    209.             segmentsCount = c;
    210.         }
    211.  
    212.         public void Execute(int i, TransformAccess t)
    213.         {
    214.             if (i < segmentsCount - 1)
    215.             {
    216.                 t.rotation = rotForParent[i + 1];
    217.             }
    218.  
    219.             if (i > 0)
    220.             {
    221.                 t.position = procedPos[i];
    222.             }
    223.         }
    224.     }
    225.  
    226.  
    227. }
    228.  



    Just assign some object to "Segment" set length for stress testing (copies count), "sepr" for separation distance.
     

    Attached Files:

  2. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,264
    Do you have actual performance numbers of both implementations? From the clip it looks like you are comparing jobs to nothing running at all.
     
  3. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    Yes but I would need to share second code having nearly same amount of lines.
    I don't think you need all of that.
    With jobs and burst it should have at least 100 fps or even 800 when having about 24
     
  4. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,264
    Check the profiler timeline view? I'm suspicious you aren't running with Burst and all your jobs are running serially on one worker thread.
     
  5. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
  6. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,264
    I see what is going on now. So your biggest issue is actually your hierarchy. Since every segment is the child of the previous, what happens is every time you touch one TransformAccess, it updates all the children transforms as well. My guess is that when working with MonoBehaviours, it is getting around that using a deferred technique that isn't really possible in a IJobParallelForTransform context. This also explains why your jobs are executing serially. Only transforms part of independent hierarchies can run parallel to each other.

    If you still need the root object to propagate enabled/disabled/instantiation/destruction, I suggest making all the segments direct children of the root. Otherwise, don't parent them at all.
     
  7. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    Thanks! That helped! And now there is desired 800 fps, I was parenting them because I want make this animation cooperate with keyframed animations (on skinned meshes, working on bones) which are basing on parenting, maybe some tip for that?
    I have idea how to solve it but not sure if it will work.
     
  8. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,264
    Glad that got you somewhere!

    I don't know enough about how you are blending keyframed animation with this to offer any tips. It might be a matter of maintaining two hierarchies, one with the keyframed animation, and the other using the flattened hierarchy that your jobs operate on (reading from the keyframe chained hierarchy should still be cheap in a job).
     
    Nirvan likes this.
  9. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    One more thing, I tried creating 600 objects with this behaviour with smaller lengths and performance again is very weak (without rendering). I should use ECS for managing that or can I do it other way?
     
  10. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,264
    600 objects really shouldn't be hitting GameObject world that hard. Something is wrong.
     
  11. Nirvan

    Nirvan

    Joined:
    Nov 16, 2013
    Posts:
    134
    I am convinced it should work much faster anyway.
    I prepared package if you or someone would like to look at this a bit more deeply. (U2019.4.8)

    So it works 2x faster with enabled burst compile when there is one snake with for example 5k length.
    But with multiple short snakes it works very slow.
     

    Attached Files: