Search Unity

Performance drop with increasing billboard size

Discussion in 'Shaders' started by Kivak, Jul 20, 2018.

  1. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    Hello everyone,

    I am creating an application that needs to handle lots of billboards. Stupid amounts of billboards. I have elected to use geometry shaders for this and I can easily create millions of billboarded quads without any problems.... except when they start getting big.

    When I create 3.2million points with a size of 1, I get about 70fps on my machine. However, when I slide that over to a size of 100, I go down to about 15fps with the same number of points.

    My question is: why? It's not generating any additional geometry. I am not using shadows... is there a way to mitigate this?

    I was thinking it had something to do with shadows, so I removed them. The only other answer I have thought of comes to geometry intersection. The only way to mitigate that comes from making smaller billboards. But for the same of argument, let's say that's not possible.

    I have attached a project that will demonstrate this. Just hit Play, it will generate 3.2 million quads, then you can change the material's QuadSize from 1 to 100 and you'll see the drop.

    Thank you all for any help!

    PS: Also, here is my shader code in case something pops out:
    Code (CSharp):
    1. Shader "Custom/Points" {
    2.     Properties {
    3.         _Opacity("Opacity", Range(0,1)) = 1.0
    4.         _QuadSize("Size", Range(0,100)) = 1.0
    5.     }
    6.     SubShader {
    7.         Tags {"Queue"="Transparent" "IgnoreProjector"="True" "RenderType"="Transparent" }
    8.         Blend SrcAlpha OneMinusSrcAlpha
    9.         LOD 200
    10.  
    11.         Pass {
    12.             Tags{ "LightMode" = "ForwardBase"}
    13.             CULL Back
    14.  
    15.             CGPROGRAM
    16.             #include "UnityCG.cginc"
    17.             #pragma vertex vert
    18.             #pragma fragment frag
    19.             #pragma geometry geom
    20.             #pragma multi_compile_fog
    21.             #pragma target 5.0
    22.  
    23.             struct v2g {
    24.                 float4 pos: SV_POSITION;
    25.                 float3 norm: NORMAL;
    26.                 float2 uv: TEXCOORD0;
    27.                 float4 color: COLOR;
    28.             };
    29.  
    30.             struct g2f {
    31.                 float4 pos: SV_POSITION;
    32.                 float3 norm: NORMAL;
    33.                 float4 diffuseColor : COLOR;
    34.                 UNITY_FOG_COORDS(2)
    35.             };
    36.  
    37.             half _QuadSize;
    38.             half _Opacity;
    39.  
    40.             inline float3 ObjSpaceViewDir( in float3 v ) {
    41.                 float3 objSpaceCameraPos = mul(unity_WorldToObject, float4(_WorldSpaceCameraPos.xyz, 1)).xyz * 1.0;
    42.                 return objSpaceCameraPos - v;
    43.             }
    44.  
    45.             inline float3 objSpaceCameraPos() {
    46.                 float3 objSpaceCameraPos = mul(unity_WorldToObject, float4(_WorldSpaceCameraPos.xyz, 1)).xyz * 1.0;
    47.                 return objSpaceCameraPos;
    48.             }
    49.            
    50.             v2g vert(appdata_full v) {
    51.                 float3 v0 = v.vertex.xyz;
    52.  
    53.                 v2g OUT;
    54.                 OUT.pos = v.vertex;
    55.                 OUT.norm = v.normal;
    56.                 OUT.uv = v.texcoord;
    57.                 OUT.color = v.color;
    58.  
    59.                 return OUT;
    60.             }
    61.  
    62.             [maxvertexcount(4)]
    63.             void geom(point v2g IN[1], inout TriangleStream<g2f> triStream) {
    64.                
    65.                 float3 up = float3(0, 1, 0);
    66.                 float3 look = ObjSpaceViewDir( IN[0].pos );
    67.                 float3 cameraPosObjectSpace = objSpaceCameraPos();
    68.                
    69.                 look = normalize(look);
    70.                 float3 right = cross(up, look);
    71.                 up = cross(look,right);
    72.                                
    73.                 float halfS = _QuadSize;
    74.                
    75.                 float4 v[4];
    76.                 v[0] = float4(IN[0].pos + halfS * right - halfS * up, 1.0f);
    77.                 v[1] = float4(IN[0].pos + halfS * right + halfS * up, 1.0f);
    78.                 v[2] = float4(IN[0].pos - halfS * right - halfS * up, 1.0f);
    79.                 v[3] = float4(IN[0].pos - halfS * right + halfS * up, 1.0f);
    80.                                
    81.                 float3 normal = -look;
    82.                 float4 color = IN[0].color;
    83.  
    84.                 g2f OUT;
    85.                 UNITY_INITIALIZE_OUTPUT(g2f, OUT);
    86.  
    87.                 OUT.pos = UnityObjectToClipPos(v[0]);
    88.                 OUT.norm = normal;
    89.                 OUT.diffuseColor = color;
    90.                 UNITY_TRANSFER_FOG(OUT, OUT.pos);
    91.                 triStream.Append(OUT);
    92.  
    93.                 OUT.pos = UnityObjectToClipPos(v[1]);
    94.                 OUT.norm = normal;
    95.                 OUT.diffuseColor = color;
    96.                 UNITY_TRANSFER_FOG(OUT, OUT.pos);
    97.                 triStream.Append(OUT);
    98.  
    99.                 OUT.pos = UnityObjectToClipPos(v[2]);
    100.                 OUT.norm = normal;
    101.                 OUT.diffuseColor = color;
    102.                 UNITY_TRANSFER_FOG(OUT, OUT.pos);
    103.                 triStream.Append(OUT);
    104.  
    105.                 OUT.pos = UnityObjectToClipPos(v[3]);
    106.                 OUT.norm = normal;
    107.                 OUT.diffuseColor = color;
    108.                 UNITY_TRANSFER_FOG(OUT, OUT.pos);
    109.                 triStream.Append(OUT);
    110.                
    111.             }
    112.  
    113.             half4 frag(g2f IN) :COLOR{
    114.                 float4 col = IN.diffuseColor;
    115.                 UNITY_APPLY_FOG(IN.fogCoord, col); // apply fog
    116.                 return col;
    117.             }
    118.  
    119.            
    120.  
    121.             ENDCG
    122.         }
    123.     }
    124.  
    125. }
     

    Attached Files:

  2. LennartJohansen

    LennartJohansen

    Joined:
    Dec 1, 2014
    Posts:
    2,394
    as the billboards gets bigger you have more overdraw, more pixels per billboard.
    That takes time in the pixel shader. The vertex shader work should be the same.
     
  3. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    If I am only using a single color, can I not have a fragment shader at all?
     
  4. LennartJohansen

    LennartJohansen

    Joined:
    Dec 1, 2014
    Posts:
    2,394
    each pixel still are processed, just faster with a single unlit shader with one color.
     
  5. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    OK, so for someone who is still very new to shaders, how would I go about doing that? How would I optimize this shader?
     
  6. LennartJohansen

    LennartJohansen

    Joined:
    Dec 1, 2014
    Posts:
    2,394
    There is not much to do on the fragment shader, you only set the color and apply fog...
     
  7. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    So there is nothing that can be done to reduce the performance impact of this shader?
     
  8. LennartJohansen

    LennartJohansen

    Joined:
    Dec 1, 2014
    Posts:
    2,394
    Not really. larger triangles will generate more pixels. If you had them sorted you could skip a lot of pixels with early z testing
     
  9. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    The fragment shader is how GPUs output a color value. With out them you don't get any color values at all.

    Think about it this way. When a GPU renders a triangle it runs a fragment shader for every pixel that triangle covers. If you increase the size of the triangle, it covers more pixels, thus it has to run the fragment shader more times. If you have a screen resolution of 1920x1080 that's 2,073,600 pixels that can be rendered to, if your quad only covers an on screen area of 50x50 pixels, that's 2,500 pixels that need to be updated to render it, and thus 2,500 invocations of the fragment shader. If you have 10 quads that same size, that's 25,000 invocations of the fragment shader that are required, even if they're in an even grid on the screen and not overlapping, or all in one stack at the center of the screen!

    If you make the quads cover more of the screen, that's more times the fragment shader has to run. There really isn't a magic bullet to fixing this apart from don't render quads that size, or render using a lower screen / render target resolution. Even ignoring the execution time of fragment shaders themselves, which is quite likely nearly free for all intents in this case, the cost of modifying the color values in memory for that many times will eventually start to be noticeable on the framerate no matter how simple the fragment shader is.

    There are some minor optimizations you could do for this specific case, like not transfer the fog values, color, or normal to the fragment shader if they're not needed.
     
    Last edited: Jul 20, 2018
  10. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    OK so I understand I need a fragment shader as well how it works in terms of rendering. But if the renderer understands z-depth, wouldn't it not re-render the same pixel? Why does it matter if I have a single 50x50 square or a trillion 50x50 squares overlapping each other? Shouldn't it just render the 50x50 pixel area of the front-most object and ignore the remainder?
     
  11. LennartJohansen

    LennartJohansen

    Joined:
    Dec 1, 2014
    Posts:
    2,394
    it still has to test the zbuffer for each pixel to see if it is in front. and the work you do when setting a color is really fast.
     
  12. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    Depends on the order they render in, and if your shader is writing to / testing against the depth buffer, and if the render target has one. In your case your shader is using the defaults, which writes to the depth buffer (ZWrite On) and tests against it for early z rejection (ZTest LEqual). However the GPU renders geometry in whatever order you tell it to. If your quad positions are sorted furthest to nearest, it'll be worse than not using depth at all as now the shader is writing to both depth and color (more data == more time) and no pixels will be rejected since the depth values are always further away than what's being drawn. If you have it sorted nearest to furthest then it can optimally do early z rejection and avoid unnecessary fragment shader invocations, but it still has to do the tests for each pixel. This is usually faster than running the fragment shader, but I'm not actually sure in the case of outputting a solid color.
     
  13. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    Basically it's re-rendering that 50x50 square a trillion times. With early z rejection it just might skip running the fragment shader for some of them, assuming they rendered in the optimal order.
     
  14. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    OK, so really the only way to increase performance is by reducing the number of billboards or reducing the size of them?
     
  15. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    Number (on screen), size, or screen resolution.
     
  16. bgolus

    bgolus

    Joined:
    Dec 7, 2012
    Posts:
    12,352
    One option you have is when the quads start to get large enough that they're overlapping, swap them out for a single quad.
     
  17. Kivak

    Kivak

    Joined:
    Jul 13, 2013
    Posts:
    140
    OK excellent. Not what I had hoped for, but at least I understand better about what's going on. Thank you both for all the help and explanation!
     
  18. aleksandrk

    aleksandrk

    Unity Technologies

    Joined:
    Jul 3, 2017
    Posts:
    3,025