Search Unity

  1. We are migrating the Unity Forums to Unity Discussions by the end of July. Read our announcement for more information and let us know if you have any questions.
    Dismiss Notice
  2. Dismiss Notice

DrawMeshInstancedIndirect Z Sort

Discussion in 'General Graphics' started by GilCat, Sep 27, 2019.

  1. GilCat


    Sep 21, 2013
    I'm having problems rendering object in the correct Z order. I'm using DrawMeshInstancedIndirect and it seems like the order that those quads are rendered depends on the order that the data is supplied to the compute buffer.
    However if i change the ZWrite to On the order is not the one from the ComputeBuffer anymore and i loose my transparency.
    I was wondering what is the best option to sort the objects (based on distance to the camera) to obtain the correct behavior, or if there is something else that i can do regarding the shader ZWrite. It seems to me that doing that sort on the CPU can be too heavy an will not scale well so is using ComputeBuffers the best choice?

    Here is a screen of what is happening:

    Thanks in advance and sorry for some basic shader questions that might be here!

    Also here is my shader.

    Shader "Sprites/Instanced"
    _BaseColor("Main Color", Color) = (1,1,1,1)
    _MainTex("Texture", 2D) = "white" {}
    _TransitionTex("Transition Texture", 2D) = "transparent" {}
    _Cutoff("Cutoff", Range(0, 1)) = 1
    _TileOffset("Tile Offset", Vector) = (1,1,0,0)
    _Scale("Mesh Scale", Vector) = (1,1,1)
    _Pivot("Pivot", Vector) = (0,0,0)
    Tags { "Queue" = "Transparent" "IgnoreProjector" = "True" "RenderType" = "Transparent" }
    LOD 100
    Blend SrcAlpha OneMinusSrcAlpha
    ZWrite Off
    #pragma vertex vert
    #pragma fragment frag
    #pragma multi_compile_local __ USE_COMPUTE
    #pragma multi_compile_instancing
    #pragma instancing_options procedural:setup
    #include "UnityCG.cginc"

    void setup() {}

    struct appdata
    float4 vertex : POSITION;
    float2 uv : TEXCOORD0;

    struct v2f
    float2 uv1 : TEXCOORD0;
    fixed2 uv2 : TEXCOORD1;
    float4 vertex : SV_POSITION;
    fixed4 color : COLOR0;
    float cutoff : CUTOFF;

    float f16tof32(uint x)
    const uint shifted_exp = (0x7c00 << 13);
    uint uf = (x & 0x7fff) << 13;
    uint e = uf & shifted_exp;
    uf += (127 - 15) << 23;
    uf += lerp(0, (128u - 16u) << 23, e == shifted_exp);
    uf = lerp(uf, asuint(asfloat(uf + (1 << 23)) - 6.10351563e-05f), e == 0);
    uf |= (x & 0x8000) << 16;
    return asfloat(uf);

    float2 uintToFloat2(uint input) {
    return float2(f16tof32(input & 0x0000FFFF), f16tof32((input & 0xFFFF0000) >> 16));

    float4 uint2ToFloat4(uint2 input) {
    float2 xy = uintToFloat2(input.x);
    float2 zw = uintToFloat2(input.y);
    return float4(xy.x, xy.y, zw.x, zw.y);

    float4x4 uint4x2ToFloat4x4(uint4x2 input) {
    float4 c0 = uint2ToFloat4(float2(input[0].x, input[1].x));
    float4 c2 = uint2ToFloat4(float2(input[0].y, input[1].y));
    float4 c1 = uint2ToFloat4(float2(input[2].x, input[3].x));
    float4 c3 = uint2ToFloat4(float2(input[2].y, input[3].y));
    return float4x4(
    c0.x, c1.x, c2.x, c3.x,
    c0.y, c1.y, c2.y, c3.y,
    c0.z, c1.z, c2.z, c3.z,
    c0.w, c1.w, c2.w, c3.w);

    sampler2D _MainTex;
    float4 _MainTex_ST;
    float4 _TransitionTex_ST;
    sampler2D _TransitionTex;

    float4 _TileOffset;
    float3 _Scale;
    float2 _Pivot;
    fixed4 _BaseColor;
    float _Cutoff;

    // uint is 32 bit and is filled with half2 (2 * 16 bit)
    StructuredBuffer<uint4x2> localToWorldBuffer; // half4x4
    StructuredBuffer<uint2> tileOffsetBuffer; // half4
    StructuredBuffer<uint2> scaleBuffer; // half4
    StructuredBuffer<uint> pivotBuffer; // half2
    StructuredBuffer<uint2> colorBuffer; // half4
    StructuredBuffer<uint> cutoffBuffer; // half

    v2f vert(appdata v, uint instanceID : SV_InstanceID)
    v2f o;
    #ifdef USE_COMPUTE
    float3 scale = uint2ToFloat4(scaleBuffer[instanceID]);
    float2 pivot = uintToFloat2(pivotBuffer[instanceID]);
    float4 tileOffset = uint2ToFloat4(tileOffsetBuffer[instanceID]);
    float4x4 localToWorld = uint4x2ToFloat4x4(localToWorldBuffer[instanceID]);
    float4 color = uint2ToFloat4(colorBuffer[instanceID]);
    float cutoff = cutoffBuffer[instanceID];
    float3 scale = _Scale;
    float2 pivot = _Pivot;
    float4 tileOffset = _TileOffset;
    float4x4 localToWorld = float4x4(
    1, 0, 0, 0,
    0, 1, 0, 0,
    0, 0, 1, 0,
    0, 0, 0, 1);
    float4 color = _BaseColor;
    float cutoff = _Cutoff;
    float4x4 scaleMatrix = float4x4(
    scale.x, 0, 0, 0,
    0, scale.y, 0, 0,
    0, 0, scale.z, 0,
    0, 0, 0, 1);
    float4 localVertexPos = mul(scaleMatrix, v.vertex) + mul(scaleMatrix, float4(pivot.x, pivot.y, 0, 0));
    float4 localTranslated = mul(localToWorld, localVertexPos);
    o.vertex = UnityObjectToClipPos(localTranslated);
    o.uv1 = TRANSFORM_TEX(v.uv * tileOffset.xy +, _MainTex);
    o.uv2 = TRANSFORM_TEX(v.uv, _TransitionTex);
    o.color = color;
    o.cutoff = cutoff;
    return o;

    fixed4 frag(v2f i) : SV_Target
    float4 transit = tex2D(_TransitionTex, i.uv2);
    fixed4 col = tex2D(_MainTex, i.uv1) * i.color;
    fixed alpha = max(transit.a < 1, i.cutoff);
    col.a = step(transit.b, alpha) * col.a;
    return col;
  2. richardkettlewell


    Unity Technologies

    Sep 9, 2015
    Yes that's correct, assuming you render sequentially from within that ComputeBuffer (i.e. your vertex shader for rendering pulls data out of the ComputeBuffer in linear order)

    That means it's down to you to sort the data yourself, which on the GPU is a complex topic. Depending how many items you are wanting to sort, you could look into GPU Bitonic Sorting - there should be some stuff on Google that you can use as a starting point, maybe even written for Unity. If you only have a small(ish) number of items to sort, you might even be able to to do the sorting in a single Compute dispatch.

    Alternatively, if you could "get away with" enabling Z write and Alpha Test the edges (aka Cutout render mode, using "clip" in the pixel shader) you would get hardware per-pixel sorting, but you would need MSAA to get any kind of soft edge on the Sprites.
    Last edited: Sep 27, 2019
    GilCat likes this.
  3. GilCat


    Sep 21, 2013
    Thanks for you answer.

    Yes that came out in my research about this. I just wanted to know if i was in the right direction.

    How is that the default renderer does it with sprites without the need for enabling Z write and Alpha Test? Is it sorted further away deep inside the rendering pipeline?

  4. richardkettlewell


    Unity Technologies

    Sep 9, 2015
    The default renderer knows all the object positions, and they are stored on the CPU, so we can perform a sort based on the Transparency Sort Mode defined on the Camera (usually distance to camera), and, once sorted, submit a draw call for each object in the sorted order. Sorting on the CPU is generally much easier than on the GPU :)

    But in your case, the object positions are in a ComputeBuffer, which is on the GPU. You're bypassing all the default Unity stuff by taking this path (presumably in order to get better performance).
    MINORLIFE and GilCat like this.
  5. BOBchasing


    Mar 3, 2020
    I have the same problem. Did you find a solution later
  6. sewy


    Oct 11, 2015
    I'd like to know as well @GilCat .