Search Unity

Wrong weighted order independent transparency

Discussion in 'Shaders' started by psomgeorg, Sep 7, 2019.

  1. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    I am trying to implement the weighted OIT from this paper : http://casual-effects.blogspot.com/2014/03/weighted-blended-order-independent.html
    and i am using as a helper an implementation example a guy did in Unity and posted in github
    https://github.com/candycat1992/OIT_Lab (i want to implement the "OIT/WeightedBlended")
    The difference is that i dont have any transparent gameobject created in the editor and i am creating some smoke particles dynamically in via script. For that reason i use drawMeshInstancedIndirect as a draw call.
    I did get a result using the 3 shaders ( accumulate, revealage and blend) however the result is wrong i get some strange artifacts
    upload_2019-9-7_11-35-33.png
    upload_2019-9-7_11-36-8.png
    Below is the code in c# and the 3 shaders
    Code (CSharp):
    1.  
    2. void Start()
    3.     {
    4.        
    5.         camera = GetComponent<Camera>();
    6.        
    7.         cameraEvent = CameraEvent.AfterEverything;
    8.         argsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint), ComputeBufferType.IndirectArguments);
    9.         InitPosition();
    10.         //Accum
    11.         accumTex = RenderTexture.GetTemporary(Screen.width, Screen.height, 0, RenderTextureFormat.ARGBHalf, RenderTextureReadWrite.Linear);
    12.         commandBuffer = new CommandBuffer()
    13.         {
    14.             name = "Testing"
    15.         };
    16.  
    17.         commandBuffer.SetRenderTarget(accumTex);
    18.         commandBuffer.ClearRenderTarget(false, true, new Vector4(0.0f,0.0f,0.0f,0.0f), 1f);
    19.         commandBuffer.DrawMeshInstancedIndirect(instanceMesh, subMeshIndex, accumMaterial, -1, argsBuffer);
    20.         GetComponent<Camera>().AddCommandBuffer(cameraEvent, commandBuffer);
    21.         material.SetTexture("_AccumTex", accumTex);
    22.         //Revealage
    23.         revealTex = RenderTexture.GetTemporary(Screen.width, Screen.height, 0, RenderTextureFormat.RHalf, RenderTextureReadWrite.Linear);
    24.         CommandBuffer cm3 = new CommandBuffer();
    25.         cm3.SetRenderTarget(revealTex);
    26.         cm3.ClearRenderTarget(false, true, new Vector4(1.0f, 1.0f, 1.0f, 1.0f), 1f);
    27.         cm3.DrawMeshInstancedIndirect(instanceMesh, subMeshIndex, revealMaterial, -1, argsBuffer);
    28.         GetComponent<Camera>().AddCommandBuffer(cameraEvent, cm3);
    29.         material.SetTexture("_RevealTex", revealTex);
    30.         //Blend
    31.         CommandBuffer cm2 = new CommandBuffer();
    32.         cm2.SetRenderTarget(BuiltinRenderTextureType.CameraTarget);
    33.         cm2.DrawMeshInstancedIndirect(instanceMesh, subMeshIndex, material, -1, argsBuffer);
    34.         camera.AddCommandBuffer(cameraEvent, cm2);
    35.     }
    The InitPosition function just inits a position buffer and the argsBuffer and the posionBuffer for the corresponing materials
    And these are the 3 shaders
    Code (CSharp):
    1. Shader "Unlit/AccumulateShader"
    2. {
    3.     Properties
    4.     {
    5.         _MainTex ("Texture", 2D) = "white" {}
    6.     }
    7.     SubShader
    8.     {
    9.         Tags { "Queue" = "Transparent" "RenderType" = "Transparent" "IgnoreProjector" = "True" }
    10.         LOD 100
    11.         Zwrite Off
    12.         Blend One One
    13.         Pass
    14.         {
    15.             CGPROGRAM
    16.             #pragma vertex vert
    17.             #pragma fragment frag
    18.             #pragma target 4.5
    19.             #include "UnityCG.cginc"
    20.  
    21.            
    22.             struct v2f
    23.             {
    24.                 float2 uv : TEXCOORD0;
    25.                 float4 vertex : SV_POSITION;
    26.                 float z : TEXCOORD1;
    27.             };
    28.  
    29.             sampler2D _MainTex;
    30.             float4 _MainTex_ST;
    31.             #if SHADER_TARGET >= 45
    32.                 StructuredBuffer<float4> _Position;
    33.             #endif
    34.             v2f vert (appdata_full v, uint instanceID: SV_InstanceID)
    35.             {
    36.                 #if SHADER_TARGET >= 45
    37.                     float4 data = _Position[instanceID];
    38.                 #else
    39.                     float4 data = 0;
    40.                 #endif
    41.                 v2f o;
    42.                 float4x4 worldMatrix = unity_ObjectToWorld;
    43.                 worldMatrix[0][3] = data.x;
    44.                 worldMatrix[1][3] = data.y;
    45.                 worldMatrix[2][3] = data.z;
    46.                 float4 worldPos = mul(worldMatrix, v.vertex);
    47.                 o.z = (mul(UNITY_MATRIX_V, worldPos)).z;
    48.                 o.vertex = mul(UNITY_MATRIX_VP, worldPos);
    49.                
    50.                 o.uv = TRANSFORM_TEX(v.texcoord, _MainTex);
    51.              
    52.                 return o;
    53.             }
    54.             float weight(float z,float alpha)
    55.             {
    56.                 return alpha * max(1e-2, min(3 * 1e3, 10.0 / (1e-5 + pow(z / 5, 2) + pow(z / 200, 6))));
    57.             }
    58.  
    59.             fixed4 frag (v2f i) : SV_Target
    60.             {
    61.                
    62.                 fixed4 col = tex2D(_MainTex, i.uv);
    63.                 col = float4(col.rgb * col.a, col.a)* weight(i.z,col.a);
    64.              
    65.                 return col;
    66.             }
    67.  
    68.            
    69.             ENDCG
    70.         }
    71.     }
    72. }
    Code (CSharp):
    1. Shader "Unlit/RevealShader"
    2. {
    3.     Properties
    4.     {
    5.         _MainTex ("Texture", 2D) = "white" {}
    6.     }
    7.     SubShader
    8.     {
    9.         Tags { "Queue" = "Transparent" "RenderType" = "Transparent" "IgnoreProjector" = "True" }
    10.         LOD 100
    11.         Zwrite Off
    12.         Blend Zero OneMinusSrcAlpha
    13.         Pass
    14.         {
    15.             CGPROGRAM
    16.             #pragma vertex vert
    17.             #pragma fragment frag
    18.             #pragma target 4.5
    19.             #include "UnityCG.cginc"
    20.  
    21.            
    22.             struct v2f
    23.             {
    24.                 float2 uv : TEXCOORD0;
    25.                 float4 vertex : SV_POSITION;
    26.                
    27.             };
    28.  
    29.             sampler2D _MainTex;
    30.             float4 _MainTex_ST;
    31.             #if SHADER_TARGET >= 45
    32.                 StructuredBuffer<float4> _Position;
    33.             #endif
    34.             v2f vert (appdata_full v, uint instanceID: SV_InstanceID)
    35.             {
    36.                 #if SHADER_TARGET >= 45
    37.                     float4 data = _Position[instanceID];
    38.                 #else
    39.                     float4 data = 0;
    40.                 #endif
    41.                 v2f o;
    42.                 float4x4 worldMatrix = unity_ObjectToWorld;
    43.                 worldMatrix[0][3] = data.x;
    44.                 worldMatrix[1][3] = data.y;
    45.                 worldMatrix[2][3] = data.z;
    46.                 float4 worldPos = mul(worldMatrix, v.vertex);
    47.                
    48.                 o.vertex = mul(UNITY_MATRIX_VP, worldPos);
    49.                
    50.                 o.uv = TRANSFORM_TEX(v.texcoord, _MainTex);
    51.              
    52.                 return o;
    53.             }
    54.            
    55.             fixed4 frag (v2f i) : SV_Target
    56.             {
    57.                
    58.                 fixed4 col = tex2D(_MainTex, i.uv);
    59.                 col = col.aaaa;
    60.              
    61.                 return col;
    62.             }
    63.  
    64.            
    65.             ENDCG
    66.         }
    67.     }
    68. }
    Code (CSharp):
    1. Shader "Unlit/SmokeShader"
    2. {
    3.     Properties
    4.     {
    5.         _MainTex ("Texture", 2D) = "white" {}
    6.         _AccumTex ("Accumulate Texture",2D) = "black" {}
    7.         _RevealTex("Revealage Texture", 2D) = "white" {}
    8.     }  
    9.     SubShader
    10.     {
    11.         Tags { "Queue" = "Transparent" "RenderType" = "Transparent" "IgnoreProjector" = "True" }
    12.         LOD 100
    13.         ZWrite Off
    14.         Blend  SrcAlpha OneMinusSrcAlpha
    15.         Pass
    16.         {
    17.             CGPROGRAM
    18.             #pragma vertex vert
    19.             #pragma fragment frag
    20.             #pragma target 4.5
    21.             #include "UnityCG.cginc"
    22.  
    23.            
    24.             struct v2f
    25.             {
    26.                 float2 uv : TEXCOORD0;
    27.                 float4 vertex : SV_POSITION;
    28.                 float4 screenpos : TEXCOORD1;
    29.             };
    30.  
    31.             sampler2D _MainTex;
    32.             sampler2D _AccumTex;
    33.             sampler2D _RevealTex;
    34.            
    35.             float4 _AccumTex_TexelSize;
    36.             float4 _MainTex_ST;
    37.             #if SHADER_TARGET >= 45
    38.                 StructuredBuffer<float4> _Position;
    39.             #endif
    40.             v2f vert (appdata_full v, uint instanceID: SV_InstanceID)
    41.             {
    42.                 #if SHADER_TARGET >= 45
    43.                     float4 data = _Position[instanceID];
    44.                 #else
    45.                     float4 data = 0;
    46.                 #endif
    47.                 v2f o;
    48.                 float4x4 worldMatrix = unity_ObjectToWorld;
    49.                 worldMatrix[0][3] = data.x;
    50.                 worldMatrix[1][3] = data.y;
    51.                 worldMatrix[2][3] = data.z;
    52.                 float4 worldPos = mul(worldMatrix, v.vertex);
    53.                 o.vertex = mul(UNITY_MATRIX_VP, worldPos);
    54.                
    55.                 o.uv = TRANSFORM_TEX(v.texcoord, _MainTex);
    56.                 o.screenpos = ComputeScreenPos(o.vertex);
    57.                 return o;
    58.             }
    59.  
    60.             fixed4 frag(v2f i) : SV_Target
    61.             {
    62.  
    63.                 /*float2 uv = (floor(i.uv * _AccumTex_TexelSize.zw) + 0.5) * _AccumTex_TexelSize.xy;
    64.                 fixed4 col = tex2Dlod(_AccumTex, float4(uv, 0, 0));*/
    65.                 float2 uv = i.screenpos.xy / i.screenpos.w;
    66.                 fixed4 accum = tex2D(_AccumTex,uv);
    67.                 float reveal = tex2D(_RevealTex, uv).r;
    68.                 float4 background = tex2D(_MainTex, i.uv);
    69.                 //fixed4 col = float4(reveal,reveal,reveal,reveal);
    70.                 //fixed4 col = float4(accum.rgb / max(accum.a, 1e-5), reveal);
    71.                 fixed4 col = float4(accum.rgb / clamp(accum.a, 1e-4, 5e4), reveal);
    72.                 return (1.0 - col.a) * col + col.a * background;
    73.                
    74.              
    75.                 return col;
    76.             }
    77.             ENDCG
    78.         }
    79.     }
    80. }
     
  2. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    can someone give me an answer or an insight on what i am doing wrong.
     
  3. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    I don't know exactly where your bug is, but a few thoughts.

    1: You should be using MRT (rendering to multiple render targets in one pass) and not multiple passes of your particles. That'll be much faster to render. See this project for an example:
    https://github.com/NegInfinity/WeightedBlendedTransparency

    2: When you render the final pass, it should be a full screen blit, not rendering the particles again. The accumulation and reveal textures are already all of the data you need as full screen textures. Rendering that data using the particle geometry itself just leads back to the original depth sorting problem. Read that first article you linked to again.
     
  4. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Thank you very much i will check it out
     
  5. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    The problem with this demo is that i dont see how he computes the renderTextures he uses in the final blending. They don't seem to be created at runtime. More likely he created once and he just drag dropped them in the material he uses as the final
     
  6. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    I tried this
    upload_2019-9-10_12-10-22.png
    and i get a black image(material is the blend material) And also i do the 2 passes like i did above in the start function using command buffers.
     
  7. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    What confuses me to the demos and others projects on WOIT is that they all use camera.Render() since they already have their transparent gameObjects on their scene. I have to use drawMeshInstancedIndirect instead and i dont understand how to use it correctly
     
  8. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    Um, they're created in OnRenderImage?
    https://github.com/NegInfinity/Weig...Transparency/Scripts/TransparentRender.cs#L56
    Code (csharp):
    1. void OnRenderImage(RenderTexture src, RenderTexture dst){
    2.     // some stuff, then
    3.     baseColor = RenderTexture.GetTemporary(cam.pixelWidth, cam.pixelHeight, 16, RenderTextureFormat.ARGB32);
    4.     mrtARGB = RenderTexture.GetTemporary(cam.pixelWidth, cam.pixelHeight, 0, RenderTextureFormat.ARGBFloat);
    5.     mrtR = RenderTexture.GetTemporary(cam.pixelWidth, cam.pixelHeight, 0, RenderTextureFormat.ARGBFloat);
    You can also create temporary render targets in a command buffer, but the way to go about that is a little bit confusing as you need to create a nameID using Shader.PropertyToID() for the cmd.GetTemporaryRT() and then a RenderTextureIdentifier using the render textures in cmd.SetRenderTarget().

    Using Render() is just a way to simplify the setup, there's no reason you need to do it that way. There's no reason you need to use OnRenderImage even. You can do it all in a single command buffer, doing the blit in the command buffer as the last step.
     
  9. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Yes i see where he created them,i can see he sets those 3 renderbuffers as well, and then he just renders. I cant see how the accumulate and reveleage shader run. I dont see if he has set a material with those shaders and render them. Smoke particles have the combineShader in the renderer, which just takes the already calculated textures and use them to combine them. Also i get some missing prefabs on the car so i cant really tell which material/shader he uses and he has a bunch of them. The script uses the combineShader, and the blender smoke particle has some built in legacy alpha blended shader. So i cant seem to understand how that works.
    I have this procedure in mind correct me if i am wrong. I drawmeshinstancedIndirect with accumulate shader and save to a rendertarget with the desired format. i do the same with the reveleage shader and last i have to combine those last two rendertextures in a single shader. How will be done then? On renderImage? Because i get a black screen, seems something is going on with the rendertextures, maybe it first blit with the combine shader and then it executes the commandBuffers who are responsible for making the rendertextures. Remember i add the commandbuffers to the main camera at CameraEvent.AfterEverything.
     
  10. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    I tried to do that but i didnt know what to use as arguements in the Blit. On RenderImage i was using source and destination, but using Commandbuffers what arguements should i use. I need the image it was rendered so far in the mainCamera and the destination as the final image
     
  11. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    There appear to be two different smoke particle systems. One uses the built in shader, one uses the OIT shader. There’s a script to turn one off and turn the other on. The OIT shader writes out to both the accum and reveal textures. The combine shader should only be used in the script in the Blit.

    You were mostly doing that correctly already in terms of the C# side of things with your previous setup. You can keep that setup, you just need to use a shader that’s not doing all the extra particle related stuff and just reads the two textures using the UVs, does the math, and you’re done.

    cmd.Blit(null, BuiltinRenderTextureType.CameraTarget, combineMaterial);

    The first parameter is used to set the material’s _MainTex, which isn’t needed. Both of those example projects are using OnRenderImage and need to combine the camera’s normal output with the OIT passes. If you look at the example in the Casual Effects post Morgan doesn’t do that, he just outputs using a shader that’s using traditional alpha blending. Far more straightforward. That doesn’t require a copy of the camera’s main output, it just renders directly into the camera’s target using the accum and reveal texture alone.


    Doing two passes instead of one into a MRT is much less efficient, but you can stick to that for now just to get the rest working.
     
  12. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Now i get something like this
    upload_2019-9-11_18-42-1.png
     
  13. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    This is it basically
    upload_2019-9-11_18-42-34.png

    You can check the shaders in my first post, i didnt change them
     
    Last edited: Sep 11, 2019
  14. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    Check your composite shader’s blend mode. You have it using a traditional alpha blend, and that is not what it should be using.
     
  15. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    ok without blend this is the result
    upload_2019-9-12_9-51-26.png
    The OIT might be working but i still got weight background
     
  16. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    There is also something crazy and weird going on. I placed more instances and changed the position to see the transparency
    upload_2019-9-12_9-59-14.png
    And i kinda get this
    upload_2019-9-12_9-59-59.png
    If i change a bit the positions and add some randomness to the x as well for isntance Random.Range(-1.0f,1.0f),i get this
    upload_2019-9-12_10-1-24.png
    or this if i add the randomness to y axis
    upload_2019-9-12_10-2-34.png
    Its like the positions of the instances affects the camera's blit
     
  17. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    I didn't say it shouldn't be using a blend, I said it shouldn't be using traditional alpha blending.

    This is the blend mode you were using for your composite:
    Blend SrcAlpha OneMinusSrcAlpha

    Take a closer look at what the blend mode described here is:
    upload_2019-9-12_11-3-28.png
    (hint: the order matters)

    You previously mentioned you didn't change the shaders at all from the original post... but you should have. You're still applying the particle offsets to the blit shader which it should not be. It should be really, really dumb. Basically it should be using the vert_img function that's in UnityCG.cginc which does the absolute minimum work of passing the UVs along, and that's it. Then you should be sampling the two textures using those UVs.
     
  18. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Ok after all this time it seems to finally work,however they tend to get blurred, is it normal?
    upload_2019-9-13_9-34-40.png

    On the left its a single particle
    upload_2019-9-13_9-35-21.png
    Anyway i will try now to apply that on my project with flipbook billboarding smoke to see how it looks
     

    Attached Files:

  19. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    If you're talking about the fact that the stacked particles look like a blur rather than a stack, then yes. Weighted Blended OIT is an approximation of order independent transparency, not actual order independent transparency. Really it's an additive blend that rescales the output based on the coverage so it doesn't blow out like a normal additive blend would.
     
    AcidArrow likes this.
  20. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Ok this is how it looks like, a hint i havent done any occlusion culling yet. However it seems that some particles are more transparent than the others (more lighter). Isn't that wrong? Wasnt their alpha change dynamically when they dont have any smoke behind them?
     

    Attached Files:

  21. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Also when i get inside one of em it affects all the other particles and since it moves fast i get this flickering artifacts. In my application i have to be able to walk through the smoke and being visually correct without those artifacts. Does that happen because of the algorithm? Because if this is the reason maybe WOIT wasn't the best approach in the first place. I figured since i got so many particles (you see 1k of them in the first 20msecs, they get more and more in each 20mec)
    like maybe in a magnitude of 100K(2.5mill in total in the whole room) it will be not be able to sort them fast enough via compute shaders. For that reason i thought the transparency approximation. I guess if i use sort, i have to cull them first to get as few as possible and sort them.
     

    Attached Files:

  22. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    The biggest problem this approximation has is with near or fully opaque textures. Ideally you want everything to be relatively transparent. However that doesn’t fully explain the artifacts you’re seeing, which don’t look exactly like anything I’ve seen with this technique before. Could be a product of the texture you’re using, could be a bug with your shaders, no idea.

    One thing, that texture you’re using looks like it’s the one of the free smoke textures Unity released. If it’s the one I think it is, it’s an explosion puff that fades out and not one designed to loop. That might explain the differences in opacity you’re seeing? Not sure.
     
  23. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Yes thats the exact texture, i animate it though so it goes through all the frames, so that doesnt explain why some particles have their alpha low and some others are more dense and doesnt change through time. Its like some particles have their alpha reduced like a multiplication.I am using the wispy smoke textures which you are supposed to cycle through to animate it.
     
    Last edited: Sep 13, 2019
  24. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Do you think i should try deal with the blending transparency via bitonic sort and not with OIT or you agree with me that i will lose performance?
     
  25. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    I know why some particles are more dense than the others. It has to do with my offline program which generates the position based on some data.Apparently it places many of them in the exact same position it gives them the same speed since they get their speed based on their position. So they basically move in a group and are one inside the other.So i can also think that since they are so many (they could be more than 10) when they get in front of my screen and in front the others it changes a lot the accumulating color and opacity. I will fix my algorithm which generates the positions and see if my speculations were correct
     
  26. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    The weighted blend is definitely cheaper (if you can render the accum and reveal in one pass instead of the two you're using now). At some point you'll probably be hitting overdraw limits regardless of which option you go with.
     
  27. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Even with occlusion and frustum culling?
     
  28. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    Frustum culling doesn't have any impact on overdraw. The stuff you're culling then is already off screen.

    Occlusion may help, but most of that will be auto rejected by the GPU if your shader is using ZTest LEqual (the default if nothing is defined). Both will help with vertex count and vertex shader invocations, but do little to prevent overdraw / over shading.
     
  29. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Ok you got me confused. First of all since i am drawing them manually i need to cull them by myself, via compute shader right? Why overdraw is a problem, i mean in transparency you want to render them all to achieve that effect and in the woit approximation that's needed as well. And i guess the result of overdraw means performance cost, so let me back up a little because in a previous post you suggested drawmeshInstancedIndirect. I have from some data positions for a tons of particles about 2.5mill for every 20msec. Basically in shaders apart from the texture animation and billboarding i want with linear interpolation to move them with the position obtained by data t. Previously i was instantiating them via C# script and updating their position in the shaders, but then the problem was that unity didnt know about the staff i was doing in the shaders and frustum culling was been done according to unity. For that reason you suggested drawmeshInstancedIndirect and said i have to do everything by hand, blending(WOIT or sorting) and culling. So let me ask again, whats the best approach for this, because now i have the feeling you are telling me that i still got a problem.I think my problem is simple enough i just have some positions and want to use instancing to draw some particles, i mean in a graphics API it would be simple enough, but i need the VR part as well thats why i stick to unity.
     
  30. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    Using DrawMeshInstancedIndirect is more CPU and GPU efficient for rendering a very large number of items that Unity's built in instancing of renderer components. Mainly because it doesn't have to deal with all of those game objects and components on top of rendering. Even with the hack you were using of overriding their positions, it's still trying to cull those individual renderer components (leading to the issues you had before with the particles disappearing). Of course, using DrawMeshInstancedIndirect is also faster since you skip individually culling your partices, which is both good and bad. Good because it's not wasting CPU time, and bad because it's potentially wasting GPU time. The question comes down to which takes less time?

    When rendering 2 million quads (or more importantly 8 million vertices) on modern GPUs that's probably not a huge deal. Unity's VFX Graph can be used to simulate 8 million particles while still keeping above 60 fps on a mid range modern GPU. But the expensive of culling / occluding / sorting that many particles might cost way more than just rendering them.

    So, my general suggestion is ignore individually culling the particles all together. The DrawMeshInstancedIndirect has a bounds, try to set that to something roughly in-line with where the particles are likely to be if you can, and leave it to that. That'll let Unity skip rendering all particles in a system if it's out of view (ie: frustum culled or occluded).

    That leaves sorting. Using opaque, additive, or weighted blended OIT particles lets you skip sorting while still getting consistent results. With opaque rendering, the sorting still matters for rendering performance as the GPU can use the depth buffer to skip rendering pixels, hence why opaque objects are sorted front to back. But for additive and WBOIT, the order doesn't really matter, unlike traditional alpha blending, which is why those two options are so useful (and why most GPU instanced particle demos use additive particles).


    Lastly I mentioned overdraw / overshading. There are two parts of this. Depending on who you talk to those two terms mean the same thing or are subtly different. For opaque geometry a common case of overshading is two opaque objects getting rendered out of order, so the further object renders first, then another renders closer. All the pixels of the further object that got rendered but weren't visible in the final image are considered "overshading", as in the GPU did extra work to "shade" (render the fragment shader) for pixels that was unnecessary. When the rendering order is correct, any pixels that the closer object was rendered at the GPU will be able to skip rendering further objects at. For some this is also what overdraw means, but it can also just mean overlapping geometry even if the geometry was rendered in the correct order as the GPU still does a little bit of work to determine that it can skip rendering.

    For transparent objects, multiple layers of transparencies always leads to overshading. If you have a bunch of overlapping transparent geometry, regardless of its blend mode, the GPU has to render all of that geometry for each pixel as it has no way of doing occlusion like it can for opaque geometry. That can get expensive when you have a lot of pixels rendering a lot of overlapping transparent geometry. There's also still that bit of cost to skip rendering of transparent objects that are behind opaque ones, but overall this is usually much less cost than actually rendering it (on most modern GPUs at least). So if you have 2 million particles, and they all cover a large portion of the screen, it doesn't matter if you have an Nvidia Titan RTX, your framerate is going to be ... bad. Most GPU particle demos use very small particles for this reason, as even though the instancing and GPU side simulation is making the cost of calculating that many vertices inconsequential, the cost of rendering that number of pixels is still something we haven't fully overcome.


    So, my suggestion is thus:
    Skip sorting and use WBOIT, and live with the weirdness. I would recommend using less detailed sprites to make the artifacts of WBOIT less obvious.
    Don't do any culling or occlusion.
    Think about reducing the size of particles and fading them out as the camera gets close to help hide the fact it's shrinking. Unity's built in particle system and shaders can actually do both of these, and even limits particles to 50% of the screen height by default.
     
  31. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Right now i can see particles through opaque geometry, even with Ztest always or Lequal if i dont need occlusion what's that i am missing.
    upload_2019-9-16_9-38-40.png
    You suggest to shrink them as they move further away, to reduce overshade, or as they get close?
    They are already small by the way like a quad of 20cm
     
  32. psomgeorg

    psomgeorg

    Joined:
    Mar 16, 2019
    Posts:
    71
    Also can i change the bounds dynamically, every frame or every n numbers of frame? I call drawMeshinstancedIndirect via commandBuffers at Start. So i need somehow to either update the command or delete and add a new one?
     
  33. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    You’re rendering without a depth buffer. Notice how both of the example projects use the main camera’s depth buffer (note: that’s not camera depth texture!) as the depth when setting the render targets. You can do that, or use the camera depth texture and do a depth compare in the shader, though the later doesn’t get you any of the nice rendering optimizations the GPU might be able to.

    When using the Graphics.SetRenderTarget you need to make sure the main camera is rendered using a render texture with a depth buffer, then use just the depth buffer from that. For CommandBuffer.SetRenderTarget ... honestly I’m not entirely sure how to do this.

    BTW, when I say occlusion I’m referring to CPU side occlusion, which like culling will completely remove an object from being rendered if it can’t be seen (as determined by the occlusion data, which isn’t exactly the same as the rendered geometry). On the GPU the “occlusion” is done via depth testing.

    Close. The idea is to reduce the max screen space size, and fade them out to hide them completely (shrink them down to zero) when they’re close enough that you’d expect them to fill the screen. Overshading is about pixel coverage. Small particles are fine because they don’t cover a lot of screen space, thus not a lot of pixels.

    Though there’s also problems with overshading caused by triangles that are too small ... let’s not get into that quite yet.
     
  34. Demhaa

    Demhaa

    Joined:
    Jun 23, 2018
    Posts:
    21
    Hold on, does this "work" with HDRP? Or is directly programming shaders for it still unfeasible?
     
  35. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    7,160
    "Directly programmed" shaders for HDRP is totally possible. Many of the shaders that come with the HDRP SRP are exactly that. The only problem is HDRP shaders are generally significantly more complex than the built in rendering pipeline shaders, and apart from unlit shaders, are incompatible with the other pipelines / rendering paths.

    Now, will the above examples work with the HDRP? Maybe, but I suspect the "proper" solution is to fork the HDRP and build this into the SRP directly.