Search Unity

Bad performance in specific device using Blit and Sobel Filter to make Cel Shading

Discussion in 'Universal Render Pipeline' started by JCorfer, Nov 20, 2020.

  1. JCorfer

    JCorfer

    Joined:
    Apr 22, 2016
    Posts:
    16
    Hi community!,

    I'm embarrased because I'm getting low FPS with a simple scene in a specific device when I use a shader to make Cel-Shading. For this purpose, I use URP with Sobel shader and Blit. I set the vSyncCount setting to 0 and target frame rate to 120 from code.

    If I disable the Cel-Shading Effect (unchecking the checkbox in Renderer Feature setting of URP) I get 60 FPS, but if I enable the Cel-Shading Effect I get over 30 FPS. I've done tests using another devices (like a ZTE Blade) and I get 60 FPS using the Cel-Shading Effect.

    Unity version 2019.4.5f1

    Test Devices:
    • Huawei AGS-W09. Android 7 (Bad performance)
    • ZTE Blade A460. Android 5
    Graphics Api in Player Settings (In this order):
    • Vulkan
    • OpenGLES2
    What's happening here, could be caused by a specific GPU model?

    Thanks for the help!
    Greetings!

    Example Scene
    scene.png

    Huawei. Sobel Filter actived.

    Huawei-URP-sobel-profile.png

    Huawei. Sobel filter inactived.
    Huawei-URP-no-sobel-profile.png

    ZTE . Sobel Filter actived.

    ZTE-URP-sobel-profile.png

    Sobel Filter Shader
    Code (CSharp):
    1. Shader "Unlit/SobelFilter"
    2. {
    3.     Properties
    4.     {
    5.         [HideInInspector]_MainTex ("Base (RGB)", 2D) = "white" {}
    6.         _Delta ("Line Thickness", Range(0.0005, 0.0025)) = 0.001
    7.         [Toggle(RAW_OUTLINE)]_Raw ("Outline Only", Float) = 0
    8.         [Toggle(POSTERIZE)]_Poseterize ("Posterize", Float) = 0
    9.         _PosterizationCount ("Count", int) = 8
    10.     }
    11.     SubShader
    12.     {
    13.         Tags { "RenderType"="Opaque" }
    14.         LOD 200
    15.      
    16.         Pass
    17.         {
    18.             HLSLPROGRAM
    19.             #include "Packages/com.unity.render-pipelines.lightweight/ShaderLibrary/SurfaceInput.hlsl"
    20.             #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
    21.          
    22.             #pragma shader_feature RAW_OUTLINE
    23.             #pragma shader_feature POSTERIZE
    24.          
    25.             TEXTURE2D(_CameraDepthTexture);
    26.             SAMPLER(sampler_CameraDepthTexture);
    27.          
    28.             TEXTURE2D(_MainTex);
    29.             SAMPLER(sampler_MainTex);
    30.             float _Delta;
    31.             int _PosterizationCount;
    32.          
    33.             struct Attributes
    34.             {
    35.                 float4 positionOS       : POSITION;
    36.                 float2 uv               : TEXCOORD0;
    37.             };
    38.  
    39.             struct Varyings
    40.             {
    41.                 float2 uv        : TEXCOORD0;
    42.                 float4 vertex : SV_POSITION;
    43.                 UNITY_VERTEX_OUTPUT_STEREO
    44.             };
    45.          
    46.             float SampleDepth(float2 uv)
    47.             {
    48. #if defined(UNITY_STEREO_INSTANCING_ENABLED) || defined(UNITY_STEREO_MULTIVIEW_ENABLED)
    49.                 return SAMPLE_TEXTURE2D_ARRAY(_CameraDepthTexture, sampler_CameraDepthTexture, uv, unity_StereoEyeIndex).r;
    50. #else
    51.                 return SAMPLE_DEPTH_TEXTURE(_CameraDepthTexture, sampler_CameraDepthTexture, uv);
    52. #endif
    53.             }          
    54.  
    55.             float sobel (float2 uv)
    56.             {
    57.                 float2 delta = float2(_Delta, _Delta);
    58.              
    59.                 float hr = 0;
    60.                 float vt = 0;
    61.              
    62.                 hr += SampleDepth(uv + float2(-1.0, -1.0) * delta) *  1.0;
    63.                 hr += SampleDepth(uv + float2( 1.0, -1.0) * delta) * -1.0;
    64.                 hr += SampleDepth(uv + float2(-1.0,  0.0) * delta) *  2.0;
    65.                 hr += SampleDepth(uv + float2( 1.0,  0.0) * delta) * -2.0;
    66.                 hr += SampleDepth(uv + float2(-1.0,  1.0) * delta) *  1.0;
    67.                 hr += SampleDepth(uv + float2( 1.0,  1.0) * delta) * -1.0;
    68.              
    69.                 vt += SampleDepth(uv + float2(-1.0, -1.0) * delta) *  1.0;
    70.                 vt += SampleDepth(uv + float2( 0.0, -1.0) * delta) *  2.0;
    71.                 vt += SampleDepth(uv + float2( 1.0, -1.0) * delta) *  1.0;
    72.                 vt += SampleDepth(uv + float2(-1.0,  1.0) * delta) * -1.0;
    73.                 vt += SampleDepth(uv + float2( 0.0,  1.0) * delta) * -2.0;
    74.                 vt += SampleDepth(uv + float2( 1.0,  1.0) * delta) * -1.0;
    75.              
    76.                 return sqrt(hr * hr + vt * vt);
    77.             }
    78.          
    79.             Varyings vert(Attributes input)
    80.             {
    81.                 Varyings output = (Varyings)0;
    82.                 UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(output);
    83.  
    84.                 VertexPositionInputs vertexInput = GetVertexPositionInputs(input.positionOS.xyz);
    85.                 output.vertex = vertexInput.positionCS;
    86.                 output.uv = input.uv;
    87.              
    88.                 return output;
    89.             }
    90.          
    91.             half4 frag (Varyings input) : SV_Target
    92.             {
    93.                 UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(input);
    94.              
    95.                 float s = pow(1 - saturate(sobel(input.uv)), 50);
    96. #ifdef RAW_OUTLINE
    97.                 return half4(s.xxx, 1);
    98. #else
    99.                 half4 col = SAMPLE_TEXTURE2D(_MainTex, sampler_MainTex, input.uv);
    100. #ifdef POSTERIZE
    101.                 col = pow(abs(col), 0.4545);
    102.                 float3 c = RgbToHsv(col);
    103.                 c.z = round(c.z * _PosterizationCount) / _PosterizationCount;
    104.                 col = float4(HsvToRgb(c), col.a);
    105.                 col = pow(col, 2.2);
    106. #endif
    107.                 return col * s;
    108. #endif
    109.             }
    110.          
    111.             #pragma vertex vert
    112.             #pragma fragment frag
    113.          
    114.             ENDHLSL
    115.         }
    116.     }
    117.     FallBack "Diffuse"
    118. }
    119.  
    Blit.cs
    Code (CSharp):
    1. using System.Collections.Generic;
    2. using UnityEngine.Serialization;
    3.  
    4. namespace UnityEngine.Rendering.LWRP
    5. {
    6.     public class Blit : UnityEngine.Rendering.Universal.ScriptableRendererFeature
    7.     {
    8.         [System.Serializable]
    9.         public class BlitSettings
    10.         {
    11.             public UnityEngine.Rendering.Universal.RenderPassEvent Event = UnityEngine.Rendering.Universal.RenderPassEvent.AfterRenderingOpaques;
    12.          
    13.             public Material blitMaterial = null;
    14.             public int blitMaterialPassIndex = -1;
    15.             public Target destination = Target.Color;
    16.             public string textureId = "_BlitPassTexture";
    17.         }
    18.      
    19.         public enum Target
    20.         {
    21.             Color,
    22.             Texture
    23.         }
    24.  
    25.         public BlitSettings settings = new BlitSettings();
    26.         UnityEngine.Rendering.Universal.RenderTargetHandle m_RenderTextureHandle;
    27.  
    28.         BlitPass blitPass;
    29.  
    30.         public override void Create()
    31.         {
    32.             var passIndex = settings.blitMaterial != null ? settings.blitMaterial.passCount - 1 : 1;
    33.             settings.blitMaterialPassIndex = Mathf.Clamp(settings.blitMaterialPassIndex, -1, passIndex);
    34.             blitPass = new BlitPass(settings.Event, settings.blitMaterial, settings.blitMaterialPassIndex, name);
    35.             m_RenderTextureHandle.Init(settings.textureId);
    36.         }
    37.  
    38.         public override void AddRenderPasses(UnityEngine.Rendering.Universal.ScriptableRenderer renderer, ref UnityEngine.Rendering.Universal.RenderingData renderingData)
    39.         {
    40.             var src = renderer.cameraColorTarget;
    41.             var dest = (settings.destination == Target.Color) ? UnityEngine.Rendering.Universal.RenderTargetHandle.CameraTarget : m_RenderTextureHandle;
    42.  
    43.             if (settings.blitMaterial == null)
    44.             {
    45.                 Debug.LogWarningFormat("Missing Blit Material. {0} blit pass will not execute. Check for missing reference in the assigned renderer.", GetType().Name);
    46.                 return;
    47.             }
    48.  
    49.             blitPass.Setup(src, dest);
    50.             renderer.EnqueuePass(blitPass);
    51.         }
    52.     }
    53. }
    54.  
    55.  
    BlitPass.cs
    Code (CSharp):
    1. namespace UnityEngine.Rendering.LWRP
    2. {
    3.     /// <summary>
    4.     /// Copy the given color buffer to the given destination color buffer.
    5.     ///
    6.     /// You can use this pass to copy a color buffer to the destination,
    7.     /// so you can use it later in rendering. For example, you can copy
    8.     /// the opaque texture to use it for distortion effects.
    9.     /// </summary>
    10.     internal class BlitPass : UnityEngine.Rendering.Universal.ScriptableRenderPass
    11.     {
    12.         public enum RenderTarget
    13.         {
    14.             Color,
    15.             RenderTexture,
    16.         }
    17.  
    18.         public Material blitMaterial = null;
    19.         public int blitShaderPassIndex = 0;
    20.         public FilterMode filterMode { get; set; }
    21.  
    22.         private RenderTargetIdentifier source { get; set; }
    23.         private UnityEngine.Rendering.Universal.RenderTargetHandle destination { get; set; }
    24.  
    25.         UnityEngine.Rendering.Universal.RenderTargetHandle m_TemporaryColorTexture;
    26.         string m_ProfilerTag;
    27.  
    28.         /// <summary>
    29.         /// Create the CopyColorPass
    30.         /// </summary>
    31.         public BlitPass(UnityEngine.Rendering.Universal.RenderPassEvent renderPassEvent, Material blitMaterial, int blitShaderPassIndex, string tag)
    32.         {
    33.             this.renderPassEvent = renderPassEvent;
    34.             this.blitMaterial = blitMaterial;
    35.             this.blitShaderPassIndex = blitShaderPassIndex;
    36.             m_ProfilerTag = tag;
    37.             m_TemporaryColorTexture.Init("_TemporaryColorTexture");
    38.         }
    39.  
    40.         /// <summary>
    41.         /// Configure the pass with the source and destination to execute on.
    42.         /// </summary>
    43.         /// <param name="source">Source Render Target</param>
    44.         /// <param name="destination">Destination Render Target</param>
    45.         public void Setup(RenderTargetIdentifier source, UnityEngine.Rendering.Universal.RenderTargetHandle destination)
    46.         {
    47.             this.source = source;
    48.             this.destination = destination;
    49.         }
    50.  
    51.         /// <inheritdoc/>
    52.         public override void Execute(ScriptableRenderContext context, ref UnityEngine.Rendering.Universal.RenderingData renderingData)
    53.         {
    54.             CommandBuffer cmd = CommandBufferPool.Get(m_ProfilerTag);
    55.          
    56.             RenderTextureDescriptor opaqueDesc = renderingData.cameraData.cameraTargetDescriptor;
    57.             opaqueDesc.depthBufferBits = 0;
    58.  
    59.             // Can't read and write to same color target, create a temp render target to blit.
    60.             if (destination == UnityEngine.Rendering.Universal.RenderTargetHandle.CameraTarget)
    61.             {
    62.                 cmd.GetTemporaryRT(m_TemporaryColorTexture.id, opaqueDesc, filterMode);
    63.                 Blit(cmd, source, m_TemporaryColorTexture.Identifier(), blitMaterial, blitShaderPassIndex);
    64.                 Blit(cmd, m_TemporaryColorTexture.Identifier(), source);
    65.             }
    66.             else
    67.             {
    68.                 Blit(cmd, source, destination.Identifier(), blitMaterial, blitShaderPassIndex);
    69.             }
    70.          
    71.             context.ExecuteCommandBuffer(cmd);
    72.             CommandBufferPool.Release(cmd);
    73.         }
    74.  
    75.         /// <inheritdoc/>
    76.         public override void FrameCleanup(CommandBuffer cmd)
    77.         {
    78.             if (destination == UnityEngine.Rendering.Universal.RenderTargetHandle.CameraTarget)
    79.                 cmd.ReleaseTemporaryRT(m_TemporaryColorTexture.id);
    80.         }
    81.     }
    82. }
    83.  
     
    Last edited: Nov 20, 2020
  2. JCorfer

    JCorfer

    Joined:
    Apr 22, 2016
    Posts:
    16
    I've seen several threads in the forum talking about a lot of bugs in URP. Could be my case another URP's bug?
     
  3. weiping-toh

    weiping-toh

    Joined:
    Sep 8, 2015
    Posts:
    192
    Nope, Blit is just simply expensive. I had bottlenecks using Blit too.
    There is the alternative method which is ~0.2ms faster than Blit, where you set the renderTarget to the dest texture and draw a full screen mesh/quad.

    Instead of
    Code (CSharp):
    1. command.Blit(src,dst,mat)
    you use

    Code (CSharp):
    1. command.SetRenderTarget(dst);
    2. command.SetGlobalTexture("_BlitTex", src);
    3. command.DrawMesh(fullscreen, identity,material);
    https://stackoverflow.com/a/23994979
     
    JCorfer and tmonestudio like this.
  4. weiping-toh

    weiping-toh

    Joined:
    Sep 8, 2015
    Posts:
    192
    The degradation in performance will be more significant if your resolution is high
     
  5. JCorfer

    JCorfer

    Joined:
    Apr 22, 2016
    Posts:
    16
    Thanks for the answer @weiping-toh

    I've been research and effectively I could see how the FPS slows down when I increase the resolution from Editor. I'm trying to implement your solution using command DrawMesh, but I'm not shure if I've understanded the complete solution.

    Actually, every sprites or meshes that use Universal Render Pipeline shader and they are viewed throught the camera, automatically they are showed with outline. Implemeting the best perfomance solution, there's some change that I have to do:


    Matrix4x4 clipToView = GL.GetGPUProjectionMatrix(Camera.main.projectionMatrix, true).inverse;
    cmd.GetTemporaryRT(m_TemporaryColorTexture.id, opaqueDesc, filterMode);
    cmd.SetRenderTarget(destination.Identifier());
    cmd.SetGlobalTexture("_BlitTex", source);
    cmd.DrawMesh(mesh, clipToView, blitMaterial, 0);


    Could correct that the 'mesh' should have the same screen resolution and it should be attached in Camera.main?
     
    tmonestudio likes this.
  6. weiping-toh

    weiping-toh

    Joined:
    Sep 8, 2015
    Posts:
    192
    JCorfer and tmonestudio like this.
  7. JCorfer

    JCorfer

    Joined:
    Apr 22, 2016
    Posts:
    16
    Thanks again @weiping-toh !

    In that url, it uses a script in the Camera and a mesh throught the inspector, I don't still understand how I can change the code to improve the performance and get the same effect.

    I've searched another ways, and it's incredible cannot found a optimal outline for all devices, a technique created in the year 2000 (thanks to Jet Set Radio game). I don't want think what will happen when I have to migrate the game to iOS...:eek:
     
  8. weiping-toh

    weiping-toh

    Joined:
    Sep 8, 2015
    Posts:
    192
    It is just a matter of abstraction on the GPU level. The blit command is like a magic wand for texture copying and rendering to screen. But what it actually abstracts away is the complicated set of instructions to do consider the various use-cases and do a lot processing instructions. The whole idea is that if you have a specific use-case like copying textures and rendering to memory, you do not need the steps for sending the memory to framebuffer/screen and resolving AA, which is all part of the blit command. Blit is like the one-in-all command in the GPU, which is very clumbersome.
     
    tmonestudio and JCorfer like this.
  9. JCorfer

    JCorfer

    Joined:
    Apr 22, 2016
    Posts:
    16