Search Unity

[RELEASED] GPU Instancer

Discussion in 'Assets and Asset Store' started by LouskRad, May 3, 2018.

  1. Lars-Steenhoff

    Lars-Steenhoff

    Joined:
    Aug 7, 2007
    Posts:
    3,526
    Did anyone test GPU instancer on iphone 7 plus? will this benefit there, or should I stick to static batching.
    Talking about 500 objects ( trees )
     
  2. Nexusmaster

    Nexusmaster

    Joined:
    Jun 13, 2015
    Posts:
    365
    Hi, just want the dev to know that I had to fix the depth copy for HDRP:
    I added this shader in resources:
    Code (CSharp):
    1. Shader "Hidden/DepthCopy"
    2. {
    3.     SubShader
    4.     {
    5.         Cull Off ZWrite On ZTest Always
    6.         ColorMask 0
    7.  
    8.         Pass
    9.         {
    10.             CGPROGRAM
    11.             #pragma vertex vert
    12.             #pragma fragment CopyDepthBufferFragmentShader
    13.          
    14.             struct appdata
    15.             {
    16.                 float4 vertex : POSITION;
    17.                 float2 uv : TEXCOORD0;
    18.             };
    19.  
    20.             struct v2f
    21.             {
    22.                 float2 uv : TEXCOORD0;
    23.                 float4 vertex : SV_POSITION;
    24.             };
    25.  
    26.             v2f vert (appdata v)
    27.             {
    28.                 v2f o;
    29.                 o.vertex = UnityObjectToClipPos(v.vertex);
    30.                 o.uv = v.uv;
    31.                 return o;
    32.             }
    33.          
    34.             Texture2DArray<float> _MyDepthTex;
    35.             SamplerState my_linear_clamp_sampler;
    36.  
    37.             // important part: outputs depth from _MyDepthTex to depth buffer
    38.             half4 CopyDepthBufferFragmentShader(v2f i, out float outDepth : SV_Depth) : SV_Target
    39.             {
    40.                 float depth = _MyDepthTex.SampleLevel(my_linear_clamp_sampler, float3(i.uv.xy,0), 0).r;//SAMPLE_DEPTH_TEXTURE(_MyDepthTex, i.uv);
    41.                 outDepth = depth;
    42.                 return 0;
    43.             }
    44.  
    45.             ENDCG
    46.         }
    47.     }
    48. }
    And in GPUInstancerHiZOcclusionGenerator.cs :

    Code (CSharp):
    1.         private void Awake()
    2.         {
    3.             hiZTextureSize = Vector2.zero;
    4.             GPUInstancerConstants.SetupComputeTextureUtils();
    5.             occlusionCullingType = GPUInstancerConstants.gpuiSettings.occlusionCullingType;
    6.  
    7. #if !UNITY_2018_3_OR_NEWER
    8.             occlusionCullingType = GPUIOcclusionCullingType.Default;
    9. #endif
    10.  
    11.             var depthCopyShader = Resources.Load("Shaders/DepthCopy") as Shader;
    12.             m_DepthCopyMat = new Material(depthCopyShader);
    13.         }
    14.  
    And this:


    Code (CSharp):
    1.         private void UpdateTextureWithComputeShader(int offset)
    2.         {
    3. #if UNITY_2018_3_OR_NEWER
    4.             if (_isDepthTex2DArray)
    5.             {
    6.                 //Graphics.CopyTexture(unityDepthTexture, 0, 0, _tempDepthTextureForTex2DArray, 0, 0);
    7.  
    8.                 m_DepthCopyMat.SetTexture("_MyDepthTex", unityDepthTexture);
    9.                 Graphics.Blit(null, _tempDepthTextureForTex2DArray, m_DepthCopyMat);
    10.  
    11.                 GPUInstancerUtility.CopyTextureWithComputeShader(_tempDepthTextureForTex2DArray, hiZDepthTexture, offset);
    12.             }
    13.             else
    14. #endif
    15. ...
    Graphics.CopyTexture throws an error, because formats didn't fit anymore.
     
  3. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    @LouskRad Yep makes total sense. Doing a deep profile I can see that yes most of the time is spent setting various compute shader params. I can also see that the cost would be directly related to the amount of prototypes, the loop in UpdateGPUBuffers and sets the compute shader shows this.

    So the next question would be if there is some way to do the setup for each UpdateGPUBuffer in parallel ie job etc. I believe the actual dispatch can't but perhaps building the buffer could be. Basically doing UpdateGPUBuffer multithreaded or at least parts of it. Will need to dig into that but so far have not found anything online saying you can or cant do it so just need to give it a go unless you already know. Thanks.

    public static void UpdateGPUBuffers<T>(ComputeShader cameraComputeShader, int[] cameraComputeKernelIDs,
    ComputeShader visibilityComputeShader, int[] instanceVisibilityComputeKernelIDs, List<T> runtimeDataList,
    GPUInstancerCameraData cameraData, bool isManagerFrustumCulling, bool isManagerOcclusionCulling, bool showRenderedAmount, bool isInitial)
    where T : GPUInstancerRuntimeData
    {
    if (runtimeDataList == null)
    return;

    for (int i = 0; i < runtimeDataList.Count; i++)
    {
    UpdateGPUBuffer(cameraComputeShader, cameraComputeKernelIDs, visibilityComputeShader, instanceVisibilityComputeKernelIDs,
    runtimeDataList, cameraData, isManagerFrustumCulling, isManagerOcclusionCulling, showRenderedAmount, isInitial);
    }
    }
     
  4. MonkeyPuzzle

    MonkeyPuzzle

    Joined:
    Jan 17, 2016
    Posts:
    119
    Thanks! I got through to someone on the Sony forums that mentioned that GPU Instancer works well on PS4/5.
     
  5. N1warhead

    N1warhead

    Joined:
    Mar 12, 2014
    Posts:
    3,884
    @LouskRad Does GPUI work with Quest 2? I know the asset store page just says "Quest" isn't supported, doesn't specifically state which one (or both).
     
  6. Stingreye

    Stingreye

    Joined:
    Mar 15, 2018
    Posts:
    9
    @LouskRad Prefab manager has been working great for my use. One thing I have been doing is converting all my terrain trees to game objects to be able to have separate colliders and physics vs unities efficient but one giant collider combining the terrain and the trees. If I use the tree manager for the terrain trees, are the colliders handled like the prefab manager or are you using Unity's default terrain tree colliders
     
  7. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Hi @LouskRad thanks for the help, so wanted to pass this along from unity confirming what we have been discussing.

    Only optimization possible right now would be the following.

    For now, I'm building a command buffer and just dispatching the command buffer over and over, rather than rebuilding the commands every frame

    Multithreaded dispatch of command buffers.

    Hopefully someday. But for now it indeed is singlethreaded. A major reason for this is dependency tracking. Our API works as DX11/OpenGL would. So if you use a resource in Dispatch call then all the barriers etc must be issued correctly on platforms that require them.

    Let's use an example. I have two threads. One uses some image as an UAV and another as SRV in a compute shader. So there needs to be a barrier between them. Two threads issue a dispatch at the same time. If we just push them to the GPU (in API's that allow it, like Vulkan etc) that's an error as there is no barrier. So we'd basically have to have an intermediate layer that would wait for both of the threads, and then issue the calls internally from a single thread after looking at the dependencies. Basically bringing us back into the single threaded performance.

    For real multithreaded dispatch one needs some way of determining dependencies ahead of time so that the cases where there is a need for barriers etc are serialized but independent cases are not. We don't have such a thing yet unfortunately.(edited)
     
    LouskRad likes this.
  8. pedropla

    pedropla

    Joined:
    Apr 9, 2018
    Posts:
    21
    Hi @LouskRad,

    I just purchased GPU instancer and did a quick test on an Android device converting my test project's trees and details to use GPUi.

    This is the set up I used for details (with just one grass 2d texture to test):

    Screenshot 2021-01-07 191644.jpg
    Screenshot 2021-01-07 191709.jpg

    When activated my FPS drops to about 6fps on Android (from 30fps without GPUi) and when I do a deep profile I see a lot of the time is spent on this bounding function:

    Screenshot 2021-01-07 191313.jpg

    I'm guessing I must be doing something wrong with my set up. I didn't disable wind as that doesn't seem to be what is causing the delay although I could be wrong.

    Do you have any documentation that describes how to optimize for mobile devices? I checked the wiki but there wasn't much information apart from limiting the amount of foilage.

    Thanks,
    Pedro
     
  9. MicCode

    MicCode

    Joined:
    Nov 19, 2018
    Posts:
    59
    GPUI is not working in 2020.2 with HDRP 10.2.2
    Code (CSharp):
    1. Shader error in 'GPUInstancer/FoliageHDRP': 'GetDecalSurfaceData': no matching 2 parameter function at line 495 (on d3d11)
    Is there a update coming along? Many thanks
     
  10. zIyaGtVm

    zIyaGtVm

    Joined:
    Dec 27, 2017
    Posts:
    131
    That's it, everything works fine! Thanks!

    By the way is it possible to InitializeGPUInstancer with "additive mode" (Only reinitializing the instances which have just been removed ,without refreshing instances still exist)
    For example, I have several cube instances. I remove some of them then call initialize method to bring them back.
    However currently all my instances will disappear for a moment after calling forceNew InitializeGPUInstancer().
    flash.gif
     
  11. Relec

    Relec

    Joined:
    May 16, 2016
    Posts:
    6
    Hello, strange issue out of the box for me:




    Any ideas of what I am doing wrong? This is using URP on Unity 2020.1
     
  12. Relec

    Relec

    Joined:
    May 16, 2016
    Posts:
    6

    For some reason reinstalling everything fixed this problem. I did notice I have a weird issue with rigid bodies where some are rendered and some are not and some are rendered in only one eye on my Quest 2. It seems like I have to make the collider modifier the same exact size as my maximum culling distance. That way rigid bodies are activated at the same time as culling. This gets rid of any of the weirdness.


    One other question, any help with this error:
    "Cannot create GPUI HiZ Depth Texture for occlusion culling: Screen size is too small."
     
    Last edited: Jan 10, 2021
  13. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    The Oculus Quest (any version) is not officially supported by GPUI with the reason being its compute shader and buffer limitations.

    GPUI does not deal at all with the colliders. The prefab manager just turns the Mesh Renderer components off (where the Tree Manager disables the drawing of trees on the Unity terrain) and renders the objects. That is, tree colliders will be handled by the terrain if you use a unity terrain.

    Hi Pedro,
    Thank you for the detailed information.

    I've said this in response to your mail as well, but will write it here too in case it helps someone else.

    It looks like the partitioning used by GPUI Detail Manager is creating way too many (16384) partitions for you. This is normally calculated from the Max Detail distance property and the terrain size on the manager. With a rather large terrain and small max detail distance (like 40 in your screen) it could indeed produce undesirable results.

    You can try disabling the Auto SP Cell Size of the manager, and use a manual cell size. I would advise starting with 256 to try. The smaller the cells get, the less GPU memory GPUI will use - but the more the bounds. Contains method will run. You will need to find a sweet spot that works best for your terrain.

    I hope this helps.

    You can use the auto add remove functionality to let GPUI handle the adding and removing of your instances in an optimized way.
     
  14. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    Thank you for your feedback. We will work on an update this week and fix the issue in the next GPUI update.
     
    ftejada likes this.
  15. Sisay

    Sisay

    Joined:
    Dec 6, 2012
    Posts:
    57
    Shader error in 'GPUInstancer/FoliageHDRP': 'LightLoop': cannot convert output parameter from 'struct LightLoopOutput' to 'float3' at line 3440 (on d3d11)
    Shader error in 'GPUInstancer/ColorVariationShader': 'LightLoop': cannot convert output parameter from 'struct LightLoopOutput' to 'float3' at line 2495 (on d3d11)
    Shader error in 'GPUInstancer/ImpactShield': 'LightLoop': cannot convert output parameter from 'struct LightLoopOutput' to 'float3' at line 3319 (on d3d11)
    Unity 2020.2.1f1
     
  16. shi946

    shi946

    Joined:
    Jun 3, 2019
    Posts:
    16
    What is the difference b/w prefabManager.AddPrefabInstance and AddPrefabInstances?

    The former takes ~0.2 ms per prefab; for my runtime destruction code with dozens of pieces being spawned per frame, this is non-negligible. In comparison, my object pooling takes 10x less time to spawn each prefab. AddPrefabInstances takes 10-100x less time but doesn't seem to spawn the instances correctly (some instances disappear, not all instances appear).

    Is there any faster alternative?
     
  17. N1warhead

    N1warhead

    Joined:
    Mar 12, 2014
    Posts:
    3,884
    @LouskRad Thanks for the info on the Quest platform in general mate.
     
  18. Darioszka

    Darioszka

    Joined:
    Jan 22, 2015
    Posts:
    12
    I have an error on the console when I go from manu to game scene through Preload scene.
    NullReferenceException: Object reference not set to an instance of an object
    GPUInstancer.GPUInstancerUtility.AddDetailInstanceRuntimeDataToList (System.Collections.Generic.List`1[T] runtimeDataList, System.Collections.Generic.List`1[T] detailPrototypes, GPUInstancer.GPUInstancerTerrainSettings terrainSettings, System.Int32 detailLayer) (at Assets/GPUInstancer/Scripts/Core/Static/GPUInstancerUtility.cs:929)
    GPUInstancer.GPUInstancerDetailManager.InitializeRuntimeDataAndBuffers (System.Boolean forceNew) (at Assets/GPUInstancer/Scripts/GPUInstancerDetailManager.cs:134)
    GPUInstancer.GPUInstancerEditorSimulator.EditorUpdate () (at Assets/GPUInstancer/Scripts/GPUInstancerEditorSimulator.cs:131)
    UnityEditor.EditorApplication.Internal_CallUpdateFunctions () (at C:/buildslave/unity/build/Editor/Mono/EditorApplication.cs:200)

    GPUInstancer is in my project for a few months and there was no problem with it for this time.
     
    Last edited: Jan 12, 2021
  19. sebas77

    sebas77

    Joined:
    Nov 4, 2011
    Posts:
    1,642
    I updated to 2020.2 and GPUI stopped rendering. No errors from unity or compilation of shaders. Debugging with renderdoc (still in the middle of it) I can see the shaders are generated correctly and the draw calls called correctly. but still no output on the screen. It may be something in the compute shader itself, but at the moment I have no clue.

    ok after half a day of investigation it appears to me that the new URP 10 is not generating properly the define PROCEDURAL_INSTANCING_ON.

    I am not 100% sure what's going on yet, but my shader didn't work because I do:

    #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
    #include "Assets/GPUInstancer/Shaders/Include/GPUInstancerInclude.cginc"
    #pragma instancing_options procedural:setupGPUI
    #endif

    but then I noticed that the GPUI generated shaders do not use this ifdef and in fact once I remove it, it works, but it doesn't make any sense.
     
    Last edited: Jan 14, 2021
  20. FarhezAhmed

    FarhezAhmed

    Joined:
    Dec 15, 2020
    Posts:
    17
    After adding detail manager and in play mode grass disappeared and very low fps. using gpu instancer latest 1.43.
     
  21. Da-Luk

    Da-Luk

    Joined:
    Apr 25, 2017
    Posts:
    53
    Hey,
    I know GPUI turns the renderer off.
    But is there a way to check via c# script of a prefab gameObject if its instanced renderer is visible or not visible by the camera like already existing methods for example " void OnBecameVisible() / void OnBecameInvisible " ?

    This way I want to manually disable the collider of the gameObject when its instance renderer is not visible.

    Another Question.
    I have different gameObject groups, each group contains 100 prefabs which are instanced during play mode.
    I want to disable a group during runtime but actually it only disables the gameobjects not the instances.
    Do you have a special API for that, or is it even possible?

    Thank you in advance.
     
  22. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    Hi everyone,

    GPU Instancer v1.4.4 is live on the Asset Store. This version fixes the issues you have been reporting in Unity 2020.2.

    We have also remade all the GPUI shaders for the SRP packages in this version using ShaderGraph 10.

    You can extract the demo packages for your respective SRP form the Demos folder as usual. If you just want to use the new ShaderGraph shaders without the demos, you can extract the relevant shader package from the Extras folder.

    Please also note that Unity's Package Manager has a disturbing bug lately that misguides users to believe they have the latest version of an Asset Store asset where they actually don't. Therefore, if you don't see the update option in the Package Manager window, you can delete the existing asset file from where Unity downloads it in your computer. For Windows users, this file is located under:

    %APPDATA%\Unity\Asset Store-5.x\GurBu Technologies\

    for MacOS users, the file should be located under:

    ~/Library/Unity/Asset\ Store/GurBu\ Technologies/

    Deleting this file should allow the Package Manager to be able to download the latest version. If you still don't see the update after deleting, you can refresh the window.

    You can make sure you are using the latest version (1.4.4) by checking the version number on a GPUI manager:

    upload_2021-1-18_16-19-50.png

    Best wishes.
     
    zIyaGtVm, MicCode and iddqd like this.
  23. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    A faster alternative would be using a no game object approach.

    It looks like you are using an older version of GPUI; please update it from the Asset Store and see if it fixes the issue. If not, please send us a support request using this guide.

    GPUI does all the visibility calculations on the GPU; and an "isVisible" query would need to read this data back to the CPU and that in turn would slow the rendering down considerably. So there is no option for this in the system.

    As for enabling disabling instances, you can simply use the Auto. Add Remove Instances feature on the manager, which would let GPUI handle the instances as you enable/disable game objects. On the other hand, if you want to do it via the API, you can take a look at the AddPrefabInstance and RemovePrefabInstance methods.
     
  24. Raul_T

    Raul_T

    Joined:
    Jan 10, 2015
    Posts:
    363
    Hello!

    Running into an issue when coupling GPUI with scene streaming - loading/unloading.

    When unloading scenes this exception occurs, and after it happens once, GPU instancer add/remove prefabs no longer works correctly (some old prefabs won't get removed, some new ones are skipped - out of bounds exceptions also start to happen)

    upload_2021-1-19_14-55-9.png

    Culprit seems to be this line?
    upload_2021-1-19_14-59-51.png

    Any idea how we can solve this?

    For reference, our game's world is chunked into multiple scenes that get streamed at runtime, we have a root gameobject in each scene that holds the GPUInstancer prefabs and has the PrefabListRuntimeHandler script attached to it. The issue happens when unloading chunks thus triggering the handler's script OnDisable() event

    Thanks
     
  25. sebas77

    sebas77

    Joined:
    Nov 4, 2011
    Posts:
    1,642
    You think the problem with 2020.2 was due to your code? I was pretty sure that 2020.2 introduced a bug. Can you explain what the problem was please?

    Edit: I don't understand, it seems to me that
    PROCEDURAL_INSTANCING_ON is still not defined therefore
    UNITY_PROCEDURAL_INSTANCING_ENABLED is not either.

    In my shader I have

    #ifdef UNITY_PROCEDURAL_INSTANCING_ENABLED
    #include "Assets/GPUInstancer/Shaders/Include/GPUInstancerInclude.cginc"
    #pragma instancing_options procedural:setupGPUI
    #endif

    but this is never true! Just to be clear, your demos don't use this define so you may not have noticed it, hence it may not be properly related to GPUI although in some of your framework shader code you do use
    UNITY_PROCEDURAL_INSTANCING_ENABLED
     
    Last edited: Jan 20, 2021
  26. Sisay

    Sisay

    Joined:
    Dec 6, 2012
    Posts:
    57
    Unity 2020.2 Hdrp - low fps and glitches
     

    Attached Files:

  27. FarhezAhmed

    FarhezAhmed

    Joined:
    Dec 15, 2020
    Posts:
    17
    its broken now, after fixing another issue comes.
     
  28. Sisay

    Sisay

    Joined:
    Dec 6, 2012
    Posts:
    57
    I have more FPS without gpu instancer(my scene with 10 000 tree) - very strange
     
  29. FarhezAhmed

    FarhezAhmed

    Joined:
    Dec 15, 2020
    Posts:
    17
    same here.
     
  30. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    Hi There,

    The PrefabListRuntimeHandler script was designed for use with a specific internal case (for use with MapMagic). It is thus not a generic code, so we don't recommend using this script - at least without modifying it for your use case. You can use the Auto. Add/Remove feature of the manager, or if you want specific functionality you can use the API to implement your specific case.

    You should not use an ifdef there. That exact ifdef is already used in the GPUInstancerInclude file. The issue you are facing is probably because of your ifdef the include is not recognized - yet the setup pragma ignores the ifdef (Unity shader compiler treats pragma directives as a special case).

    The new HDRP 10 seems to have performance issues - it seems to be slower than the HDRP 7/8. This seems to be the case is in general - with or without GPUI. Having said that, GPUI depends on the GPU for its effect; so if the new HDRP performance issues are caused by its heavier load on the GPU, that would indeed affect GPUI further. Thus, we can't do much about the slowness of the new HDRP - apart from hoping that Unity will fix the issue in future updates.

    As for the flickering the flickering issue is caused when the Occlusion Culling feature is used where the framerate is too low, the depth texture is not refreshed fast enough.
     
    Raul_T likes this.
  31. Acissathar

    Acissathar

    Joined:
    Jun 24, 2011
    Posts:
    677
    Hi,

    Recently picked this up and it's been great so far! I had a quick question about prefab variants and this message is really all I could find in the thread + wiki:
    upload_2021-1-21_9-12-22.png


    By different renderers, what difference are we talking about? For example, I have a cliff prefab. I then have 3 variants of said cliff prefab, the mesh is the same, the shader is the same, etc. the only difference is they have different materials. Would I be better off adding the base prefab or each of the variants in this case?
     
  32. bthanse

    bthanse

    Joined:
    Jun 24, 2019
    Posts:
    114
    Is GPU Instancer working with World Streamer 2 from NatureManufacture ?
     
  33. Sisay

    Sisay

    Joined:
    Dec 6, 2012
    Posts:
    57
    in my case it gets +1 ms to cpu, gpu takes less 4 ms after turning on gpuinstancer
     
  34. ngond002

    ngond002

    Joined:
    Feb 11, 2020
    Posts:
    2
    Hello,

    We have been using GPU Instancer for all of our small foliage and grass in our game. On the PC, there are no visible issues, but when running on PS4 and PS5, there is a lot of flashing and flickering of the grass. It appears when moving through the scene and occurs on all of the foliage at once (as if everything is turning on and off). It is a great deal more noticeable on the PS5. We have already disabled occlusion culling and even frustum culling to no effect. All of the grass is using the HDRP Lit shader with alpha maps. We are using Unity version 2019.3.15 (HDRP v7.3.1).

    @LouskRad Would there be anything else that could interfere with the rendering of the instanced grass even if the occlusion culling is not enabled?

    Any assistance would be greatly appreciated.
    Thank you.
     
    Last edited: Jan 25, 2021
  35. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Wanted to pass this along @LouskRad and see what you thought.

    I found it can be relatively expensive to set the matrix when doing no game object pattern. Matrix4x4 can be a bit time consuming (well relatively and gets worse the more items your updating). I thought of this experiment and it seemed to work were the overall time to setup the job was far less then actually setting up the matrix in the main thread.

    Thoughts?

    [BurstCompile]
    public struct MatrixJob : IJob
    {
    public NativeArray<Matrix4x4> Matrix;
    public NativeArray<Vector3> cachePositions;
    public NativeArray<Quaternion> cacheRotations;

    public void Execute()
    {
    // change the data of the array
    for (int i = 0; i < Matrix.Length; i++)
    {
    Matrix = Matrix4x4.TRS(cachePositions, cacheRotations, Vector3.one);
    }
    }
    }
     
  36. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    An optimized version of the job after looking at it was just to pass it along into the job that was updating the position and rotation of my objects and set it in one go versus doing it as a separate job.

    So something like this.


    Matrix[boidCntA + (group * count)] = Matrix4x4.TRS(boid.cachePosition, boid.cacheRotation, Vector3.one);

    to pass it into the job just assign the local matrix from the main thread into the job NativeArray.


    _nativeMatrixArray.CopyFrom(_matrix4x4Array);

    once all the jobs complete that update the native matrix array then just set them back into the local.

    _nativeMatrixArray.CopyTo(_matrix4x4Array);

    You could also access the native array but I think that accessing the native array is super super slow compared with the standard array in the main thread and its actually overall faster to assign to a local array.

    This created a massive (relatively) speed improvement for me over doing it in the main thread after all the positions and rotations had been updated.
     
    LouskRad likes this.
  37. ytrewq

    ytrewq

    Joined:
    Aug 5, 2012
    Posts:
    42
    Is it possible to disable receiving shadow by GPUInstancer objects? We have scene with many high quality meshes, totally it gives more than 50 million triangles in scene. Sometime it crashes the GPU by not responding time out and closes the editor. I noticed, what for normal GameObjects this can be fixed just by turning off shadows, both casting and receiving.

    But for GPUInstancer only shadow casting turning on/off allowed, and our scene is still crashes from time to time. Is it possible to disable receiving shadows also?
     
  38. LogaNRV

    LogaNRV

    Joined:
    Jun 3, 2017
    Posts:
    35
    Sorry if this has been asked before..

    I would like to know if there is special support for two (or more) cameras at the same time.
    I have a scene with split screen for two players and right now I have disabled the frustrum and occlusion culling, so both players see every GPUI instance.

    I was thinking that maybe is possible to setup two managers and assign a different camera to each one, but I don't know if that would be the optimal solution (to use frustrum and occlusion culling, and from a performance point of view).

    Regards.
     
  39. LouskRad

    LouskRad

    Joined:
    Feb 18, 2014
    Posts:
    904
    Glad to hear it's working out for you. If your prefab variant has mesh or material differences to the original - or has additional/less renderers, than you should add it as a variant. GPUI works on same mesh/material combinations and this feature is there for ease of use.

    We haven't tested GPUI with World Streamer 2, so I can't comment on that.

    It might be that a shader in your scene is causing an error in the GPU - which might be interfering with GPUIs operations. If you are using custom shaders, I would suggest testing systematically by replacing them with the lit shader to determine if this is the case, and which shader is the culprit.

    Thank you for the feedback. We will add a prototype based shadow receiving option to the managers in the next update. For now, if you want to disable shadow receiving for all GPUI rendered instances, you can change the following Line in the GPUInstancerUtility.cs:

    Line 615: (GPUI v.1.4.4)

    runtimeData.prototype.isShadowCasting && !runtimeData.hasShadowCasterBuffer ? ShadowCastingMode.On : ShadowCastingMode.Off, true, rdRenderer.layer,

    into:

    runtimeData.prototype.isShadowCasting && !runtimeData.hasShadowCasterBuffer ? ShadowCastingMode.On : ShadowCastingMode.Off, false, rdRenderer.layer,

    Yes, you can do it like you are proposing. For more information, you can take a look at this wiki article on the subject.
     
    LogaNRV likes this.
  40. Black_Raptor

    Black_Raptor

    Joined:
    Nov 3, 2014
    Posts:
    181
    Hi !

    I got an issue with GPU Instancer the occlusion culing seem to not work correctly :


    As you can see my tree and bush behind the rock are rendered but the other thing who use unity culling are not, because they are not visible for the camera, but gpu instancer not occulde them ...

    The execpted result :

     
  41. bthanse

    bthanse

    Joined:
    Jun 24, 2019
    Posts:
    114
    Have tryng to add heigher draving distance to the details in the demo scene, but it look like it stops at 400-500 meters.
    What have I doing wrong ?
     
  42. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Hi @LouskRad hope you can give me a little insight into the performance hit of having matrix buffers setup that are somewhat empty.

    So from your example. We can have a matrix of spheres setup with a max count of say 1000. Then we can add or remove "spheres" by setting

    _matrix4x4Array = Matrix4x4.zero to remove

    or to add

    _matrix4x4Array = Matrix4x4.TRS(Random.insideUnitSphere * 20, Quaternion.identity, Vector3.one * Random.Range(0.5f, 1.5f));

    So the question is what is the performance overhead of having a matrix made up of a lot of "empty" slots ie set to Matrix4x4.zero? Will check but would like some insight into this as to potential issues on the GPU memory etc. We have a lot of fish objects for instance that we will need to setup matrixes for but only a few different types are active at any given location as you move around the world. Do I just setup these all upfront but set them to empty or fill them as needed like above or do I need to limit this etc. Just any insights as I am running my own perf tests would be helpful.

    Thanks.


    private void RemoveMatrix4x4FromArray(int instanceCount)
    {
    int end = sphereCount;
    sphereCount -= instanceCount;
    for (int i = sphereCount; i < end; i++)
    {
    _matrix4x4Array = Matrix4x4.zero;
    }
    }
     
  43. ngond002

    ngond002

    Joined:
    Feb 11, 2020
    Posts:
    2
    Still looking at shaders that might be causing the flickering. We wondering if receiving shadows might be a problem. However, I tried the above method, and shadows from other objects can still be seen over the instanced foliage, although forcing "ShadowCastingMode.Off" disables casting shadows.

    @LouskRad Is there something else that would override that parameter?

    Also, I was curious what the following call to "DrawMeshInstancedIndirect" was for (on Line 624 (GPUI v1.4.4)).

    Much appreciated.
     
  44. Sisay

    Sisay

    Joined:
    Dec 6, 2012
    Posts:
    57
    In unity 2020.2 even in standard build (no hdrp, no urp) in the detail scene it goes faster natively than with gpu instancer ...
    I have 5900x and 1070
     
  45. bthanse

    bthanse

    Joined:
    Jun 24, 2019
    Posts:
    114
    Why so big difference in FPS ?

    upload_2021-1-29_8-4-6.png
     
  46. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Quick question performance. Reading up on Instanced Indirect it appears the compute shader dispatch and update would only be required if the object needs to move?

    https://toqoz.fyi/thousands-of-meshes.html

    For static items do we need to be doing the overhead of the compute shader updates for each prototype during lateupdate? I am a noob at this so maybe that is a dumb question.

    Thanks
     
  47. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Ok so tested that out on its own and so far it appears to work just fine cutting out the overhead of the computeshader updates during the core game loop. So for non-moving static objects that appears to be a significant improvement in speed and scalability. You can add a lot of different material/mash combos without hitting that upper limit for objects that don't need to be moved around. Granted I am not sure the cost hit would be if you need to remove or add objects dynamically but even if that is not workable in this solution ie no compute shader updates you could still gain for those objects that are static. Not sure if this is something that makes sense to investigate for adding into GPUI for these type of prefabs or not, would seem to on the surface, or if this is something custom which I just need to handle myself.

    In our case we have a ton of various prefabs that are not just material color changes that do not move around. It starts impacting performance as I have said before because we start pushing into the 50+ (honestly going to end up with 100s, that could be mitigated a bit by having zones etc and unloading/loading ones that are near but still) and at that amount you start running into significant millisecond hits because of the updates to the computeshaders, talking 3ms plus and just keeps growing obviously.

    Thanks for reading and I guess give me your thoughts.
     
    ngond002 and Acissathar like this.
  48. Duende

    Duende

    Joined:
    Oct 11, 2014
    Posts:
    200
    Hi, I'm trying to do something simple: create a number of cubes in runtime, of different sizes. And then update some of those cubes randomly and change their sizes. But I don't want to change the sizes with the scale, I want to modify the meshes. (I know that the simple thing is to change the scale, but what I'm doing is an example to learn how to work with this asset, because I will need to change the mesh of a prototype.)

    I've been looking at the PrefabsWithoutGameObjects example scene but there it only changes the position and scale, not the mesh. To have different cubes with different meshes, do I have to create different prototypes? Or is it possible to instantiate objects using a prototype and then change the mesh?
    And if I have to create different prototypes, how do I add them using InitializeWithMatrix4x4Array?

    Thank you.
     
  49. holdingjason

    holdingjason

    Joined:
    Nov 14, 2012
    Posts:
    135
    Don't think you can modify the mesh since that would create a new mesh and have to be pumped into the GPU again.

    Here is an example if it helps btw of at runtime loading up the various prefabs. In this example finding all the GPU prefab objects in the scene and then destroying them.

    Dictionary<GPUInstancerPrefabPrototype, PrefabData> _prefabs = new Dictionary<GPUInstancerPrefabPrototype, PrefabData>();

    GPUInstancerPrefab[] objs = gameObject.GetComponentsInChildren<GPUInstancerPrefab>();

    for (int i = 0; i < objs.Length; i++)
    {
    GPUInstancerPrefab prefab = objs;

    if (_prefabs.ContainsKey(prefab.prefabPrototype) == false)
    {
    _prefabs.Add(prefab.prefabPrototype, new PrefabData());
    }

    var data = _prefabs[prefab.prefabPrototype];

    data.prefab = prefab;
    data.prefabPrototype = prefab.prefabPrototype;
    data.Positions.Add(prefab.transform.position);
    data.Rotations.Add(prefab.transform.rotation);
    data.Scales.Add(prefab.transform.localScale);

    GameObject.Destroy(prefab.gameObject);
    }

    Debug.Log((Time.realtimeSinceStartup - time) * 1000);

    Debug.Log(_prefabs.Count);

    foreach (PrefabData data in _prefabs.Values)
    {
    GPUInstancerPrefabPrototype prefabPrototype = data.prefabPrototype;

    prefabPrototype.isShadowCasting = false;

    var matrix4x4Array = new Matrix4x4[data.Positions.Count];

    for (int i = 0; i < data.Positions.Count; i++)
    {
    matrix4x4Array = Matrix4x4.TRS(data.Positions, data.Rotations, data.Scales);
    }

    //GPUInstancerAPI.AddPrefabInstance(prefabManager, data.prefab);
    // initialize the buffers with array
    GPUInstancerAPI.InitializeWithMatrix4x4Array(prefabManager, prefabPrototype, matrix4x4Array);

    //GPUInstancerAPI.UpdateVisibilityBufferWithMatrix4x4Array(prefabManager, prefabPrototype, matrix4x4Array);
    }
    }
     
    Duende and ngond002 like this.
  50. Duende

    Duende

    Joined:
    Oct 11, 2014
    Posts:
    200
    Thanks for the help. :)

    Hmm, so if I create 100 cubes, each with its own different mesh, would I have to create 100 Matrix4x4 arrays of size 1 (Matrix4x4 [1])? (Because I only need 1 cube of that type, with that unique mesh)