Search Unity

  1. Megacity Metro Demo now available. Download now.
    Dismiss Notice
  2. Unity support for visionOS is now available. Learn more in our blog post.
    Dismiss Notice

Question Custom Hybrid Render, any advice ?

Discussion in 'Graphics for ECS' started by Opeth001, Feb 3, 2020.

  1. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    Hello Everyone,

    i just created my own Hybred Renderer to support URP and old mobile devices based on the Graphics.DrawMeshInstanced API, at the moment it's just a working example and it doesn't support MaterialPropertyBlock yet.

    what im trying to improve :
    1) modify the FrustumCullingJob to run in worker threads (for the moment it's just Bursted), im more going for fixed culling threads, like whatever the RenderMesh Chunks count it never goes over 8 jobs.

    2) try to allocate less NativeContainers every frame. it's eating ~30% of the system.

    any suggestion is welcome!
    Edited:
    CustomHybredRendererSystem:
    Code (CSharp):
    1.  
    2.  
    3. #if CLIENT_BUILD && CUSTOM_HYBRED_RENDERER
    4.  
    5. using CWBR.ECS.CustomHybredRenderer.Utils;
    6. using System;
    7. using Unity.Burst;
    8. using Unity.Collections;
    9. using Unity.Collections.LowLevel.Unsafe;
    10. using Unity.Entities;
    11. using Unity.Jobs;
    12. using Unity.Mathematics;
    13. using Unity.Rendering;
    14. using Unity.Transforms;
    15. using UnityEditor;
    16. using UnityEngine;
    17. using UnityEngine.Profiling;
    18.  
    19. namespace CWBR.ECS.CustomHybredRenderer
    20. {
    21.     // [ExecuteAlways]
    22.     // [AlwaysUpdateSystem]
    23.     [UpdateInGroup(typeof(PresentationSystemGroup))]
    24.     // [UpdateAfter(typeof(UpdateRenderBoundsSystem))]
    25.     public class CustomHybredRendererSystem : JobComponentSystem
    26.     {
    27.         public static Camera ActiveCamera;
    28.         public static int MaximumFrustomCullingThreads = 16;
    29.  
    30.         EntityQuery m_FrozenGroup;
    31.  
    32.         private NativeArray<float4> m_Planes;
    33.  
    34.  
    35.         // Instance renderer takes only batches of 1023
    36.         Matrix4x4[] m_MatricesArray = new Matrix4x4[1023];
    37.  
    38.  
    39.         private int Matrix4x4CachedByteSize;
    40.         private int maxEntitiesPerBatch = 1023;
    41.  
    42.  
    43.  
    44. #region Resource Management
    45.         protected override void OnCreate()
    46.         {
    47.             Matrix4x4CachedByteSize = UnsafeUtility.SizeOf<Matrix4x4>();
    48.  
    49.             m_Planes = new NativeArray<float4>(6, Allocator.Persistent);
    50.  
    51.             m_FrozenGroup = GetEntityQuery(
    52.                 ComponentType.ReadOnly<LocalToWorld>(),
    53.                 ComponentType.ReadOnly<WorldRenderBounds>(),
    54.                 ComponentType.ReadWrite<Matrix4x4Space>(),
    55.                 ComponentType.ChunkComponentReadOnly<ChunkWorldRenderBounds>()
    56.                 );
    57.  
    58.         }
    59.  
    60.  
    61.         protected override void OnStartRunning()
    62.         {
    63.             base.OnStartRunning();
    64.             ActiveCamera = Camera.main;
    65.  
    66.             World.DefaultGameObjectInjectionWorld.GetOrCreateSystem<RenderMeshSystemV2>().Enabled = false;
    67.         }
    68.  
    69.         protected override void OnStopRunning()
    70.         {
    71.            // World.DefaultGameObjectInjectionWorld.GetOrCreateSystem<RenderMeshSystemV2>().Enabled = true;
    72.         }
    73.  
    74.         protected override void OnDestroy()
    75.         {
    76.             m_Planes.Dispose();
    77.         }
    78. #endregion
    79.  
    80.  
    81.         [BurstCompile]
    82.         struct FrustumCullingJobV2 : IJobParallelFor
    83.         {
    84.             // input Data
    85.             [ReadOnly] public NativeArray<ArchetypeChunk> Chunks;
    86.             [ReadOnly] public NativeArray<int> ChunksSortedIndices;
    87.             [ReadOnly] public NativeArray<float4> m_Planes;
    88.             [ReadOnly] public int float4x4ByteSize;
    89.  
    90.             // ChunkComponentTypes
    91.             [ReadOnly] public ArchetypeChunkComponentType<LocalToWorld> LocalToWorldType;
    92.             [ReadOnly] public ArchetypeChunkComponentType<WorldRenderBounds> WorldRenderBoundsType;
    93.             [ReadOnly] public ArchetypeChunkComponentType<ChunkWorldRenderBounds> ChunkWorldRenderBoundsType;
    94.             public ArchetypeChunkComponentType<Matrix4x4Space> Matrix4x4SpaceType;
    95.  
    96.  
    97.  
    98.             [NativeDisableParallelForRestriction]
    99.             [WriteOnly] public NativeArray<BatchData> sortedBatchDatas;
    100.  
    101.  
    102.             public void Execute(int index)
    103.             {
    104.  
    105.                 var chunk = Chunks[ChunksSortedIndices[index]];
    106.  
    107.                 var chunkBounds = chunk.GetChunkComponentData(ChunkWorldRenderBoundsType);
    108.  
    109.                 // Checking ChunkWorldRenderBounds to skip the Chunk if needed
    110.                 var intersectionMask = FrustumUtils.IntersectionMask(m_Planes, chunkBounds.Value);
    111.  
    112.  
    113.                 // Completely out
    114.                 if (intersectionMask == FrustumUtils.AllCullingPlanesExcluded)
    115.                     return;
    116.  
    117.  
    118.                 unsafe
    119.                 {
    120.                     var chunkLocalToWorldsPtr = (float4x4*)chunk.GetNativeArray(LocalToWorldType).GetUnsafeReadOnlyPtr();
    121.  
    122.  
    123.                     // all RenderMeshs are included
    124.                     if (intersectionMask == FrustumUtils.AllCullingPlanesIncluded)
    125.                     {
    126.                         sortedBatchDatas[index] = new BatchData
    127.                         {
    128.                             BatchCount = chunk.Count,
    129.                             MatricesPtr = chunkLocalToWorldsPtr,
    130.                         };
    131.                         return;
    132.                     }
    133.  
    134.  
    135.  
    136.                     var worldRenderBounds = chunk.GetNativeArray(WorldRenderBoundsType);
    137.  
    138.  
    139.                     var Matrix4x4Ptr = (float4x4*)chunk.GetNativeArray(Matrix4x4SpaceType).GetUnsafePtr();
    140.  
    141.                     int visibleEntitiesCount = 0;
    142.                     //TODO: Handle Huge Meshes Culling
    143.                     for (var i = 0; i < worldRenderBounds.Length; i++)
    144.                     {
    145.                         if (FrustumUtils.IntersectWithMask(m_Planes, worldRenderBounds[i].Value, intersectionMask) != FrustumUtils.IntersectResult.Out)
    146.                         {
    147.                             UnsafeUtility.MemCpy(Matrix4x4Ptr + visibleEntitiesCount, chunkLocalToWorldsPtr + i, float4x4ByteSize);
    148.                             visibleEntitiesCount++;
    149.                         }
    150.                     }
    151.  
    152.                     sortedBatchDatas[index] = new BatchData
    153.                     {
    154.                         BatchCount = visibleEntitiesCount,
    155.                         MatricesPtr = Matrix4x4Ptr
    156.                     };
    157.                 }
    158.  
    159.             }
    160.  
    161.         }
    162.  
    163.  
    164.  
    165.         /// <summary>
    166.         /// Set RenderMesh SCD Indexes to ChunkRenderer
    167.         /// </summary>
    168.         [BurstCompile]
    169.         struct GatherChunkRenderers : IJobParallelFor
    170.         {
    171.             [ReadOnly] public NativeArray<ArchetypeChunk> Chunks;
    172.             [ReadOnly] public ArchetypeChunkSharedComponentType<RenderMesh> RenderMeshType;
    173.             public NativeArray<int> ChunkRenderer;
    174.  
    175.             public void Execute(int chunkIndex)
    176.             {
    177.                 var chunk = Chunks[chunkIndex];
    178.                 var sharedIndex = chunk.GetSharedComponentIndex(RenderMeshType);
    179.                 ChunkRenderer[chunkIndex] = sharedIndex;
    180.             }
    181.         }
    182.  
    183.  
    184.         protected override JobHandle OnUpdate(JobHandle inputDeps)
    185.         {
    186.  
    187.             if (ActiveCamera != null)
    188.             {
    189.  
    190.                 // return if no Renderers are Selected
    191.                 int maxVisibleRenderersCount = m_FrozenGroup.CalculateEntityCount();
    192.                 if (maxVisibleRenderersCount == 0)
    193.                     return inputDeps;
    194. #if PROFILER_ENABLED
    195.                 Profiler.BeginSample("CalculateFrustumPlanes");
    196. #endif
    197.  
    198.                 // FrustumPlanes.FromCamera(ActiveCamera, m_Planes);
    199.  
    200.                 var CalculateFrustumPlanes = new CalculateFrustumPlanes
    201.                 {
    202.                     CameraProjectionMatrix = ActiveCamera.cullingMatrix,
    203.                     Planes = m_Planes
    204.                 }.Schedule();
    205. #if PROFILER_ENABLED
    206.                 Profiler.EndSample();
    207. #endif
    208.                 // Getting all concerned chunks
    209.                 var filteredChunks = m_FrozenGroup.CreateArchetypeChunkArray(Allocator.TempJob);
    210.  
    211.                 // getting sorted chunks
    212.                 var chunkCount = filteredChunks.Length;
    213.                 var chunkRenderer = new NativeArray<int>(chunkCount, Allocator.TempJob, NativeArrayOptions.UninitializedMemory);
    214.                 var sortedChunks = new NativeArraySharedValues<int>(chunkRenderer, Allocator.TempJob);
    215.  
    216.  
    217.                 var gatherChunkRenderersJob = new GatherChunkRenderers
    218.                 {
    219.                     Chunks = filteredChunks,
    220.                     RenderMeshType = GetArchetypeChunkSharedComponentType<RenderMesh>(),
    221.                     ChunkRenderer = chunkRenderer
    222.                 };
    223.  
    224.                 var gatherChunkRenderersJobHandle = gatherChunkRenderersJob.Schedule(chunkCount, 64); // Get all SCD RenderMesh Indexes
    225.  
    226.                 var sortedChunksJobHandle = sortedChunks.Schedule(gatherChunkRenderersJobHandle);// Sort them
    227.                 sortedChunksJobHandle.Complete();
    228.  
    229.                 // var sharedRenderCount = sortedChunks.SharedValueCount; // total RenderMesh unique values
    230.                 var rendererBatchesCounts = sortedChunks.GetSharedValueIndexCountArray();// [A,A,A,B,B,C,C,A,B]  Shared value counts: [4,3,2]
    231.                 var sortedChunkIndices = sortedChunks.GetSortedIndices();// [A,A,A,B,B,C,C,A,B] Sorted indices: [0,1,2,7,3,4,8,5,6]
    232.  
    233.  
    234. #if PROFILER_ENABLED
    235.                 Profiler.BeginSample("FrustumCulling");
    236. #endif
    237.                 //Debug.Log($"rendererBatchesCounts.Length : {rendererBatchesCounts.Length}");
    238.                 NativeArray<BatchData> sortedBatchesFromChunks = new NativeArray<BatchData>(filteredChunks.Length, Allocator.TempJob);
    239.  
    240.  
    241.                 var frustumCullingV2 = new FrustumCullingJobV2
    242.                 {
    243.                     Chunks = filteredChunks,
    244.                     ChunksSortedIndices = sortedChunkIndices,
    245.                     m_Planes = m_Planes,
    246.                     float4x4ByteSize = Matrix4x4CachedByteSize,
    247.                     ChunkWorldRenderBoundsType = GetArchetypeChunkComponentType<ChunkWorldRenderBounds>(true),
    248.                     LocalToWorldType = GetArchetypeChunkComponentType<LocalToWorld>(true),
    249.                     WorldRenderBoundsType = GetArchetypeChunkComponentType<WorldRenderBounds>(true),
    250.                     Matrix4x4SpaceType = GetArchetypeChunkComponentType<Matrix4x4Space>(false),
    251.                     sortedBatchDatas = sortedBatchesFromChunks,
    252.                 }.Schedule(filteredChunks.Length, (rendererBatchesCounts.Length % MaximumFrustomCullingThreads == 0) ? rendererBatchesCounts.Length / MaximumFrustomCullingThreads : MaximumFrustomCullingThreads + 1, JobHandle.CombineDependencies(inputDeps, CalculateFrustumPlanes));
    253.  
    254.                 frustumCullingV2.Complete();
    255.  
    256.                 filteredChunks.Dispose();
    257. #if PROFILER_ENABLED
    258.                 Profiler.EndSample();
    259.  
    260.                 Profiler.BeginSample("Alligning Data + DrawBatch");
    261. #endif
    262.                 int currentBatchIndex = 0;
    263.                 RenderMesh rendererMesh = default;
    264.                 for (var i = 0; i < rendererBatchesCounts.Length; i++)
    265.                 {
    266.                     bool RenderMeshInitialized = false;
    267.                     int batchCount = 0;
    268.                     int currentSharedBatchesCount = rendererBatchesCounts[i];
    269.                      
    270.                     for (var j = currentBatchIndex; j < currentBatchIndex + currentSharedBatchesCount; j++)
    271.                     {
    272.  
    273.                         var batchData = sortedBatchesFromChunks[j];
    274.  
    275.                         // this is used to specify the copy offset in case where multiple batchsDatas exceeds the 1023 limit
    276.                         int currentBatchOffset = 0;
    277.  
    278.                         if (batchData.BatchCount == 0)
    279.                             continue;
    280.  
    281.                         // if previous chunk and current one exceeds the 1023 limit
    282.                         if (batchCount + batchData.BatchCount > maxEntitiesPerBatch)
    283.                         {
    284.                             // get the free space in the current Batch
    285.                             var freeMatricesSpace = maxEntitiesPerBatch - batchCount;
    286.  
    287.                             // Copy Data to the Array
    288.                             unsafe
    289.                             {
    290.                                 CopyFromPtr(batchData.MatricesPtr, m_MatricesArray, batchCount, freeMatricesSpace);
    291.                             }
    292. #if PROFILER_ENABLED
    293.                             Profiler.BeginSample("GetSharedComponentData");
    294. #endif
    295.                             if (!RenderMeshInitialized)
    296.                             {
    297.                                 rendererMesh = EntityManager.GetSharedComponentData<RenderMesh>(chunkRenderer[sortedChunkIndices[currentBatchIndex]]);
    298.                                 RenderMeshInitialized = true;
    299.                             }
    300. #if PROFILER_ENABLED
    301.                             Profiler.EndSample();
    302. #endif
    303.                             DrawBatch(ref rendererMesh, maxEntitiesPerBatch);
    304.  
    305.                             // Reset the current Batch Count
    306.                             batchCount = 0;
    307.  
    308.                             // removes the used Matrices count
    309.                             batchData.BatchCount -= freeMatricesSpace;
    310.  
    311.                             // set a new offset to ignore the copied data
    312.                             currentBatchOffset = freeMatricesSpace;
    313.                         }
    314.  
    315.                         unsafe
    316.                         {
    317.                             CopyFromPtr(batchData.MatricesPtr + currentBatchOffset, m_MatricesArray, batchCount, batchData.BatchCount);
    318.                         }
    319.  
    320.                         // Increment the Batch Count
    321.                         batchCount += batchData.BatchCount;
    322.                     }
    323.  
    324.                     if(batchCount > 0)
    325.                     {
    326. #if PROFILER_ENABLED
    327.                         Profiler.BeginSample("GetSharedComponentData");
    328. #endif
    329.                         if (!RenderMeshInitialized)
    330.                         {
    331.                             rendererMesh = EntityManager.GetSharedComponentData<RenderMesh>(chunkRenderer[sortedChunkIndices[currentBatchIndex]]);
    332.                         }
    333. #if PROFILER_ENABLED
    334.                         Profiler.EndSample();
    335. #endif
    336.                         DrawBatch(ref rendererMesh, batchCount);
    337.                     }
    338.                  
    339.  
    340.  
    341.                     currentBatchIndex += currentSharedBatchesCount;
    342.                 }
    343. #if PROFILER_ENABLED
    344.                 Profiler.EndSample();
    345. #endif
    346.  
    347.                 sortedBatchesFromChunks.Dispose();
    348.  
    349.                 sortedChunks.Dispose();
    350.                 chunkRenderer.Dispose();
    351.              
    352.  
    353.  
    354.             }
    355.  
    356.  
    357.             return inputDeps;
    358.         }
    359.  
    360.  
    361.  
    362.  
    363.  
    364.  
    365.  
    366.         private void DrawBatch(ref RenderMesh renderer, int batchCount)
    367.         {
    368.             // Debug.Log($"DrawBatch: {Time.frameCount}");
    369. #if PROFILER_ENABLED
    370.             Profiler.BeginSample("DrawBatch");
    371. #endif
    372.             if (renderer.material.enableInstancing)
    373.             {
    374.  
    375. #if UNITY_EDITOR
    376.                 Graphics.DrawMeshInstanced(renderer.mesh, renderer.subMesh, renderer.material, m_MatricesArray, batchCount, null, renderer.castShadows, renderer.receiveShadows, renderer.layer, null, UnityEngine.Rendering.LightProbeUsage.BlendProbes);
    377.  
    378. #else
    379.                 Graphics.DrawMeshInstanced(renderer.mesh, renderer.subMesh, renderer.material, m_MatricesArray, batchCount, null, renderer.castShadows, renderer.receiveShadows, renderer.layer, ActiveCamera, UnityEngine.Rendering.LightProbeUsage.BlendProbes);
    380. #endif
    381.  
    382.  
    383.                 /*
    384.                 try
    385.                 {
    386.  
    387.  
    388.                 catch(Exception  ex){
    389.                     Debug.Log($"DrawBatch Exception for Mesh {renderer.mesh.name}: {ex.Message} ");
    390.                 }*/
    391.             }
    392.             else
    393.             {
    394.                 /*
    395.                 if (batchCount >= 2)
    396.                     Debug.LogWarning($"Please enable GPU instancing for better performance ({renderer.material})\n{AssetDatabase.GetAssetPath(renderer.material)}", renderer.material);*/
    397.  
    398.                 for (int j = 0; j < batchCount; j++)
    399.                 {
    400.  
    401. #if UNITY_EDITOR
    402.                     Graphics.DrawMesh(renderer.mesh, m_MatricesArray[j], renderer.material, renderer.layer, null, renderer.subMesh, null, (renderer.castShadows == UnityEngine.Rendering.ShadowCastingMode.On) ? true : false, renderer.receiveShadows, useLightProbes: true);      
    403. #else
    404.                             Graphics.DrawMesh(renderer.mesh, m_MatricesArray[j], renderer.material, renderer.layer, ActiveCamera, renderer.subMesh, null, (renderer.castShadows == UnityEngine.Rendering.ShadowCastingMode.On) ? true : false, renderer.receiveShadows, useLightProbes: true);
    405. #endif
    406.                 }
    407.             }
    408. #if PROFILER_ENABLED
    409.             Profiler.EndSample();
    410. #endif
    411.         }
    412.  
    413.         private unsafe void CopyFromPtr(float4x4* sourceMatrices, Matrix4x4[] outMatrices, int offset, int Count)
    414.         {
    415. #if PROFILER_ENABLED
    416.             Profiler.BeginSample("CopyFromPtr");
    417. #endif
    418.             // @TODO: This is using unsafe code because the Unity DrawInstances API takes a Matrix4x4[] instead of NativeArray.
    419.             fixed (Matrix4x4* resultMatrices = outMatrices)
    420.             {
    421.                 UnsafeUtility.MemCpy(resultMatrices + offset, sourceMatrices, Matrix4x4CachedByteSize * Count);
    422.             }
    423. #if PROFILER_ENABLED
    424.             Profiler.EndSample();
    425. #endif
    426.         }
    427.  
    428.  
    429.  
    430.         [BurstCompile]
    431.         struct CalculateFrustumPlanes : IJob
    432.         {
    433.             [ReadOnly]
    434.             public Matrix4x4 CameraProjectionMatrix;
    435.  
    436.             public NativeArray<float4> Planes;
    437.  
    438.             public void Execute()
    439.             {
    440.  
    441.                 var tempPlanes = new NativeArray<Plane>(6, Allocator.Temp);
    442.                 // left
    443.                 tempPlanes[0] = new Plane
    444.                 {
    445.                     normal = new float3(CameraProjectionMatrix.m30 + CameraProjectionMatrix.m00, CameraProjectionMatrix.m31 + CameraProjectionMatrix.m01, CameraProjectionMatrix.m32 + CameraProjectionMatrix.m02),
    446.                     distance = CameraProjectionMatrix.m33 + CameraProjectionMatrix.m03
    447.                 };
    448.  
    449.                 // right
    450.                 tempPlanes[1] = new Plane
    451.                 {
    452.                     normal = new float3(CameraProjectionMatrix.m30 - CameraProjectionMatrix.m00, CameraProjectionMatrix.m31 - CameraProjectionMatrix.m01, CameraProjectionMatrix.m32 - CameraProjectionMatrix.m02),
    453.                     distance = CameraProjectionMatrix.m33 - CameraProjectionMatrix.m03
    454.                 };
    455.  
    456.  
    457.                 // bottom
    458.                 tempPlanes[2] = new Plane
    459.                 {
    460.                     normal = new float3(CameraProjectionMatrix.m30 + CameraProjectionMatrix.m10, CameraProjectionMatrix.m31 + CameraProjectionMatrix.m11, CameraProjectionMatrix.m32 + CameraProjectionMatrix.m12),
    461.                     distance = CameraProjectionMatrix.m33 + CameraProjectionMatrix.m13
    462.                 };
    463.  
    464.                 // top
    465.                 tempPlanes[3] = new Plane
    466.                 {
    467.                     normal = new float3(CameraProjectionMatrix.m30 - CameraProjectionMatrix.m10, CameraProjectionMatrix.m31 - CameraProjectionMatrix.m11, CameraProjectionMatrix.m32 - CameraProjectionMatrix.m12),
    468.                     distance = CameraProjectionMatrix.m33 - CameraProjectionMatrix.m13
    469.                 };
    470.  
    471.                 // near
    472.                 tempPlanes[4] = new Plane
    473.                 {
    474.                     normal = new float3(CameraProjectionMatrix.m30 + CameraProjectionMatrix.m20, CameraProjectionMatrix.m31 + CameraProjectionMatrix.m21, CameraProjectionMatrix.m32 + CameraProjectionMatrix.m22),
    475.                     distance = CameraProjectionMatrix.m33 + CameraProjectionMatrix.m23
    476.                 };
    477.  
    478.                 // far
    479.                 tempPlanes[5] = new Plane
    480.                 {
    481.                     normal = new float3(CameraProjectionMatrix.m30 - CameraProjectionMatrix.m20, CameraProjectionMatrix.m31 - CameraProjectionMatrix.m21, CameraProjectionMatrix.m32 - CameraProjectionMatrix.m22),
    482.                     distance = CameraProjectionMatrix.m33 - CameraProjectionMatrix.m23
    483.                 };
    484.  
    485.                 // normalize
    486.                 for (var i = 0; i < 6; i++)
    487.                 {
    488.                     var plane = tempPlanes[i];
    489.                     float length = plane.normal.magnitude;
    490.  
    491.                     plane.normal /= length;
    492.                     plane.distance /= length;
    493.  
    494.                     Planes[i] = new float4(plane.normal.x, plane.normal.y, plane.normal.z, plane.distance);
    495.                 }
    496.  
    497.             }
    498.         }
    499.  
    500.  
    501.  
    502.         unsafe struct BatchData
    503.         {
    504.             public int BatchCount;
    505.             public float4x4* MatricesPtr; // Store the Matrices vector ptr
    506.         }
    507.  
    508.  
    509.  
    510.  
    511.  
    512.     }
    513. }
    514. #endif
    515.  
    516.  
    517.  

    Matrix4x4SpaceConversion:

    Code (CSharp):
    1.  
    2. class Matrix4x4SpaceConversion : GameObjectConversionSystem
    3. {
    4.     protected override void OnUpdate()
    5.     {
    6.  
    7.         Entities.ForEach((MeshRenderer meshRenderer) => {
    8.             var entity = GetPrimaryEntity(meshRenderer);
    9.             DstEntityManager.AddComponent<Matrix4x4Space>(entity);
    10.      
    11.         });
    12.     }
    13. }
    FrustumUtils:

    Code (CSharp):
    1. #if CLIENT_BUILD && CUSTOM_HYBRED_RENDERER
    2.  
    3. using FGTG.Networking.Utils;
    4. using Unity.Collections;
    5. using Unity.Mathematics;
    6. using UnityEngine;
    7.  
    8. namespace CWBR.ECS.CustomHybredRenderer.Utils
    9. {
    10.     public struct FrustumUtils
    11.     {
    12.  
    13.         public const uint AllCullingPlanesIncluded = 63;
    14.         public const uint AllCullingPlanesExcluded = uint.MaxValue;
    15.  
    16.  
    17.         public enum IntersectResult
    18.         {
    19.             Out,
    20.             In,
    21.             Partial
    22.         };
    23.  
    24.         static public void FromCamera(Camera camera, NativeArray<float4> planes)
    25.         {
    26.             Plane[] sourcePlanes = GeometryUtility.CalculateFrustumPlanes(camera);
    27.  
    28.             for (int i = 0; i < 6; ++i)
    29.             {
    30.                 planes[i] = new float4(sourcePlanes[i].normal.x, sourcePlanes[i].normal.y, sourcePlanes[i].normal.z,
    31.                     sourcePlanes[i].distance);
    32.             }
    33.         }
    34.      
    35.         static public IntersectResult Intersect(NativeArray<float4> cullingPlanes, AABB a)
    36.         {
    37.             float3 m = a.Center;
    38.             float3 extent = a.Extents;
    39.  
    40.             var inCount = 0;
    41.             for (int i = 0; i < cullingPlanes.Length; i++)
    42.             {
    43.                 float3 normal = cullingPlanes[i].xyz;
    44.                 float dist = math.dot(normal, m) + cullingPlanes[i].w;
    45.                 float radius = math.dot(extent, math.abs(normal));
    46.                 if (dist + radius <= 0)
    47.                     return IntersectResult.Out;
    48.              
    49.                 if (dist > radius)
    50.                     inCount++;
    51.              
    52.             }
    53.          
    54.             return (inCount == cullingPlanes.Length) ? IntersectResult.In : IntersectResult.Partial;
    55.         }
    56.  
    57.  
    58.         /// <summary>
    59.         /// return an Intersection PlaneMask on which Childs should perform an Intersection Calculation.
    60.         /// All Childs should perform a calculation on Zeros.
    61.         /// </summary>
    62.         /// <param name="cullingPlanes"></param>
    63.         /// <param name="a"></param>
    64.         /// <returns></returns>
    65.         static public uint IntersectionMask(NativeArray<float4> cullingPlanes, AABB a)
    66.         {
    67.             float3 m = a.Center;
    68.             float3 extent = a.Extents;
    69.  
    70.             uint PlanesMask = 0;
    71.             for (int i = 0; i < cullingPlanes.Length; i++)
    72.             {
    73.                 float3 normal = cullingPlanes[i].xyz;
    74.                 float dist = math.dot(normal, m) + cullingPlanes[i].w;
    75.                 float radius = math.dot(extent, math.abs(normal));
    76.                 if (dist + radius <= 0)
    77.                     return AllCullingPlanesExcluded;
    78.  
    79.                 if (dist > radius)
    80.                     NetworkingUtils.SetBitByIndex(ref PlanesMask, i);
    81.  
    82.             }
    83.             return PlanesMask;
    84.         }
    85.  
    86.  
    87.  
    88.  
    89.         /// <summary>
    90.         /// returns IntersectResult value by testing intersection against specific Planes.
    91.         /// Note! Intersection will be performed against zeros
    92.         /// </summary>
    93.         /// <param name="cullingPlanes"></param>
    94.         /// <param name="a"></param>
    95.         /// <param name="intersectionMask"></param>
    96.         /// <returns></returns>
    97.         static public IntersectResult IntersectWithMask(NativeArray<float4> cullingPlanes, AABB a, uint intersectionMask)
    98.         {
    99.             float3 m = a.Center;
    100.             float3 extent = a.Extents;
    101.  
    102.             var intersectionMaskCount = 0;
    103.             var inCount = 0;
    104.             for (int i = 0; i < cullingPlanes.Length; i++)
    105.             {
    106.                 // Skip Planes with Bit index set to 1
    107.                 if (NetworkingUtils.BitByIndex(intersectionMask, i))
    108.                     continue;
    109.  
    110.                 intersectionMaskCount++;
    111.                 float3 normal = cullingPlanes[i].xyz;
    112.                 float dist = math.dot(normal, m) + cullingPlanes[i].w;
    113.                 float radius = math.dot(extent, math.abs(normal));
    114.                 if (dist + radius <= 0)
    115.                     return IntersectResult.Out;
    116.  
    117.                 if (dist > radius)
    118.                     inCount++;
    119.  
    120.             }
    121.  
    122.             return (inCount == intersectionMaskCount) ? IntersectResult.In : IntersectResult.Partial;
    123.         }
    124.  
    125.  
    126.  
    127.  
    128.  
    129.         public struct PlanePacket4
    130.         {
    131.             public float4 Xs;
    132.             public float4 Ys;
    133.             public float4 Zs;
    134.             public float4 Distances;
    135.         }
    136.  
    137.         public static NativeArray<PlanePacket4> BuildSOAPlanePackets(NativeArray<Plane> cullingPlanes, Allocator allocator)
    138.         {
    139.             int cullingPlaneCount = cullingPlanes.Length;
    140.             int packetCount = (cullingPlaneCount + 3) >> 2;
    141.             var planes = new NativeArray<PlanePacket4>(packetCount, allocator, NativeArrayOptions.UninitializedMemory);
    142.  
    143.             for (int i = 0; i < cullingPlaneCount; i++)
    144.             {
    145.                 var p = planes[i >> 2];
    146.                 p.Xs[i & 3] = cullingPlanes[i].normal.x;
    147.                 p.Ys[i & 3] = cullingPlanes[i].normal.y;
    148.                 p.Zs[i & 3] = cullingPlanes[i].normal.z;
    149.                 p.Distances[i & 3] = cullingPlanes[i].distance;
    150.                 planes[i >> 2] = p;
    151.             }
    152.  
    153.             // Populate the remaining planes with values that are always "in"
    154.             for (int i = cullingPlaneCount; i < 4 * packetCount; ++i)
    155.             {
    156.                 var p = planes[i >> 2];
    157.                 p.Xs[i & 3] = 1.0f;
    158.                 p.Ys[i & 3] = 0.0f;
    159.                 p.Zs[i & 3] = 0.0f;
    160.                 p.Distances[i & 3] = 32786.0f; //float.MaxValue;
    161.                 planes[i >> 2] = p;
    162.             }
    163.  
    164.             return planes;
    165.         }
    166.  
    167.         static public IntersectResult Intersect2(NativeArray<PlanePacket4> cullingPlanePackets, AABB a)
    168.         {
    169.             float4 mx = a.Center.xxxx;
    170.             float4 my = a.Center.yyyy;
    171.             float4 mz = a.Center.zzzz;
    172.  
    173.             float4 ex = a.Extents.xxxx;
    174.             float4 ey = a.Extents.yyyy;
    175.             float4 ez = a.Extents.zzzz;
    176.  
    177.             int4 outCounts = 0;
    178.             int4 inCounts = 0;
    179.  
    180.             for (int i = 0; i < cullingPlanePackets.Length; i++)
    181.             {
    182.                 var p = cullingPlanePackets[i];
    183.                 float4 distances = dot4(p.Xs, p.Ys, p.Zs, mx, my, mz) + p.Distances;
    184.                 float4 radii = dot4(ex, ey, ez, math.abs(p.Xs), math.abs(p.Ys), math.abs(p.Zs));
    185.  
    186.                 outCounts += (int4) (distances + radii <= 0);
    187.                 inCounts += (int4) (distances > radii);
    188.             }
    189.  
    190.             int inCount = math.csum(inCounts);
    191.             int outCount = math.csum(outCounts);
    192.  
    193.             if (outCount != 0)
    194.                 return IntersectResult.Out;
    195.             else
    196.                 return (inCount == 4 * cullingPlanePackets.Length) ? IntersectResult.In : IntersectResult.Partial;
    197.         }
    198.  
    199.         static public IntersectResult Intersect2NoPartial(NativeArray<PlanePacket4> cullingPlanePackets, AABB a)
    200.         {
    201.             float4 mx = a.Center.xxxx;
    202.             float4 my = a.Center.yyyy;
    203.             float4 mz = a.Center.zzzz;
    204.  
    205.             float4 ex = a.Extents.xxxx;
    206.             float4 ey = a.Extents.yyyy;
    207.             float4 ez = a.Extents.zzzz;
    208.  
    209.             int4 masks = 0;
    210.  
    211.             for (int i = 0; i < cullingPlanePackets.Length; i++)
    212.             {
    213.                 var p = cullingPlanePackets[i];
    214.                 float4 distances = dot4(p.Xs, p.Ys, p.Zs, mx, my, mz) + p.Distances;
    215.                 float4 radii = dot4(ex, ey, ez, math.abs(p.Xs), math.abs(p.Ys), math.abs(p.Zs));
    216.  
    217.                 masks += (int4) (distances + radii <= 0);
    218.             }
    219.  
    220.             int outCount = math.csum(masks);
    221.             return outCount > 0 ? IntersectResult.Out : IntersectResult.In;
    222.         }
    223.  
    224.         private static float4 dot4(float4 xs, float4 ys, float4 zs, float4 mx, float4 my, float4 mz)
    225.         {
    226.             return xs * mx + ys * my + zs * mz;
    227.         }
    228.  
    229.         static public IntersectResult Intersect(NativeArray<float4> planes, float3 center, float radius)
    230.         {
    231.             var inCount = 0;
    232.  
    233.             for (int i = 0; i < planes.Length; i++)
    234.             {
    235.                 var d = math.dot(planes[i].xyz, center) + planes[i].w;
    236.                 if (d < -radius)
    237.                 {
    238.                     return IntersectResult.Out;
    239.                 }
    240.  
    241.                 if (d > radius)
    242.                 {
    243.                     inCount++;
    244.                 }
    245.             }
    246.  
    247.             return (inCount == planes.Length) ? IntersectResult.In : IntersectResult.Partial;
    248.         }
    249.     }
    250.  
    251. }
    252. #endif
     
    Last edited: Feb 13, 2020
  2. Razmot

    Razmot

    Joined:
    Apr 27, 2013
    Posts:
    346
    you can try using stackallock (unsafe) and see how it goes :
    NativeArray<float4> planes = new NativeArray<float4>(12, Allocator.Temp);
    becomes
    float4* planes = stackalloc float4[12];
     
    Opeth001 likes this.
  3. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    I just updated my Custom Hybred Renderer Logic to be more efficient by preventing it from creating Matrices Buffers each frame.

    This logic is strongly based on the HybredRendererV1.

    Now it uses the default ECS logic to resize the matrices buffers by:
    1) getting a NativeArray<CD_Element> from the chunk. (will always have the same length than the entities in it)
    2) write to it.
    3) return a ReadOnly Pointers to the main thread. (no more allocation and deallocation each frame)

    Done!


    by improving the buffering logic this get problem got fixed by it's own.

    i also modifed the FrustumCulling Intersect function by returning a uint Mask from the ChunkWorldRenderBounds calculation, cause we only need to calculate the WorldRenderBounds against FrustumPlanes that resulted as Partial within the ChunkWorldRenderBounds, this way i can safely skip some calculations.

    what im trying to improve Now:
    The Graphics.DrawMeshInstanced is taking a lot of time, so im looking for advices to improve it.

    Thanks in Advance!
     
    Last edited: Feb 12, 2020
  4. tertle

    tertle

    Joined:
    Jan 25, 2011
    Posts:
    3,753
    Is it actually any faster than mesh renderers?
     
    Opeth001 likes this.
  5. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    No, it's far from computing with the rendererMeshV2.
    On my PC the rendererMeshV2 takes 0.05 to renderer 100k entities, the customHybredRenderer takes 0.1ms in frustumCulling and 0.4ms in Graphics.DrawMeshInstanced calls (which I'm trying to Optimize).
    But the purpose of this customHybredRenderer is to work with URP and on low-end devices.(which is not the case of RendererMeshV2)

    [Edit]
    @tertle
    After Updating to URP 7.2.0 RendererMeshV2 is working again for OnePlus2 but not in HTC One X9.
    testing on a OnePlus2 the CurstomHybredRenderer is more stable than the RendererMeshV2.
    CustomHybredRenderer 55~61 fps
    RendererMeshV2 45~ 58fps.
     
    Last edited: Feb 12, 2020
  6. tarahugger

    tarahugger

    Joined:
    Jul 18, 2014
    Posts:
    129
    Interesting, URP seems to work fine on RenderMeshV2. What part of it is broken?
     
  7. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    details are in this Thread.
     
    Baggers_ and tarahugger like this.
  8. tertle

    tertle

    Joined:
    Jan 25, 2011
    Posts:
    3,753
    Wanted to know how it compares against MeshRenderer (gameobjects) because you can use URP fine on them.
     
    Opeth001 likes this.
  9. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    in my case our game is based on Subscenes and can't easly seperate the rendering from Logic (like Collisions, destructables ...).
    so we have to go for a hybredRenderer Approach.
     
  10. tertle

    tertle

    Joined:
    Jan 25, 2011
    Posts:
    3,753
    I ask because that is kind of what we do in our project and I'd love a way to be able to use URP without having to keep game objects around.
     
  11. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    are you facing problems too with the current Unity's HybredRenderer ?
     
  12. tertle

    tertle

    Joined:
    Jan 25, 2011
    Posts:
    3,753
    We're using default RP at the moment. Just something we are researching.

    In my personal projects I have not had any issues with URP but I'm using very basic shaders and I know there are issues.
     
    Opeth001 likes this.
  13. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,223
    For optimizing, I'd be curious to know how many DrawBatch calls you make compared to unique RenderMeshes in your project. I have a suspicion you are making lots of small batches because the matrices aren't being packed together efficiently.

    It may be worth it to create a NativeList per unique RenderMesh (pool these NativeLists to save allocation costs) and then collect all the LTWs into these lists before jumping into the loop to draw batches of 1023. This packing can be done in parallel jobs that you don't even have to write. You can use EntityQuery.SetSharedComponentFilter, EntityQuery.CalculateEntityCount, and EntityQuery.ToComponentDataArray (job version) along with NativeList.ResizeUnitialized and NativeList.AsArray to do all of this and the jobs only read entity data so they can all be scheduled in parallel to each other.
     
    Opeth001 likes this.
  14. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    i updated my CustomHybredRenderer and added it to my first Post. (in case you are interested to see the changes or Test/Use it) ^_^
    but i dont think this can be the problem because im even having less DrawCalls than the RendererMeshV2 which is currently giving a better performance on our Low-end device (Oneplus2).

    in the new version i dont need to pool/allocate/deallocate NativeMemories every frame per Batch, im just using the default ECS logic to manage the buffers used to pass the rendered entities LocalToWorld Matrices.
    in every chunk with visible entities i create a BatchData and add it to a NativeArray<BatchData> which is used to merge similar BatchData into a single DrawCall or multiple if they are more than 1023.
    BatchData:
    Code (CSharp):
    1.  
    2. unsafe struct BatchData
    3. {
    4.      public int BatchCount; // this batch count is the number of the rendered entities within the current chunk
    5.      public float4x4* MatricesPtr; // Stores chunk's Matrices vector ReadOnly ptr
    6. }
     
    Last edited: Feb 13, 2020
  15. DreamingImLatios

    DreamingImLatios

    Joined:
    Jun 3, 2017
    Posts:
    4,223
    I read the code wrong. You are doing the merging. But you are doing the merging on the main thread, which may or may not be the bottleneck. This is pretty sketch, but I wonder if you can parallelize the copying of your matrices to C# arrays by using a static array of arrays and using non-Burst jobs to access it.
     
    Opeth001 likes this.
  16. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    CopyFromPtr calls are enough fast: 100 Calls = ~0.01ms (this is what says the Profiler)

    1) i found out that i was calling the DrawBatch function even for empty batchs, fixing it made the system gain ~0.1ms.
    2) i reduced GetSharedComponentData<RenderMesh>() calls to be called only when the batchCount is higher than 0,
    which went from 0.09ms to 0.01ms.
     
  17. tarahugger

    tarahugger

    Joined:
    Jul 18, 2014
    Posts:
    129
    I too had the issue you referenced of disco mode after building URP for IOS, worked fine in Editor but just was trashed on the device. This was back in September last year and i showed it to the tech experts help panel at Unite Copenhagen and the response was that its probably the batcher. I assumed being such a show-stopper issue that it would be high priority to get fixed.
     
    Opeth001 likes this.
  18. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    this is the reason why i created this HybredRenderer.
    try it and see if it fits your needs and maybe use it as a Starting Point. ( for the moment it's already better than the HybredRendererV2 in term of performance for Mobile)
    CustomHybredRenderer 55~61 fps
    RendererMeshV2 45~ 58fps.
     
  19. tarahugger

    tarahugger

    Joined:
    Jul 18, 2014
    Posts:
    129
    Do you have a github repo or something i can follow the source from?
     
    Opeth001 likes this.
  20. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    i dont have a Github Repo for it. (for the moment)
    but you can copy the 3 scripts added above and add them to your project.
    Note: The HybredRendererV2 is required for conversion, bounds stuff and let you normaly use subscenes in Edit Mode.
    the CustomHybredRendererSystem will replace the RendererMeshV2 at runtime.
    also you need to add CUSTOM_HYBRED_RENDERER to your Scripting define Symbols.
     
  21. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    Hello Everyone,
    I'm trying to improve the Custom HybredRenderer by implementing dynamic Batching and Occlusion Culling.
    Both will be added after the Frustum Culling.

    1) is there any good API to use Merge meshs using Bursted Jobs ?

    2) I saw that unity's Dynamic Batching is limited to a verts count. Should i follow this Limit or it's different for Bursted Jobs?

    3) I know what Occlusion Culling does and what's the benefits of using it but i have zero idea how it works. Can anyone point me to a good tutorial or anything explaining the logic ?

    Thanks !
     
    Last edited: Feb 28, 2020
  22. Opeth001

    Opeth001

    Joined:
    Jan 28, 2017
    Posts:
    1,112
    is there an efficient way to Skip chunks by Regions ?

    im trying to optimize the Frutum Culling by automatically skipping Chunks that are outside Visible Regions Bounds.

    the current Frustum Culling is simply performing the calculation over all WorldChunkBounds went it can be optimized by Skipping a lot of chunks by regions.

    eg: in this case the Frustum Culling shoud be performed only for Region with SCD 15 and 21.




    what would be the most efficient way to select all chunks with 15 and 21 SCD ?

    Note: visible Regions can be up to 9 visible regions simultaneously.