Search Unity

  1. Welcome to the Unity Forums! Please take the time to read our Code of Conduct to familiarize yourself with the forum rules and how to post constructively.
  2. We have updated the language to the Editor Terms based on feedback from our employees and community. Learn more.
    Dismiss Notice

Resolved Code works fine until I turn up the resolution variable past a point

Discussion in 'Shaders' started by Technokid2000, Dec 28, 2021.

  1. Technokid2000

    Technokid2000

    Joined:
    Dec 18, 2016
    Posts:
    36
    Hello!

    This is the second time I've posted today, sorry about that. Just wrapping my head around a few GPU things as I learn. I have the following code that I've been writing for a custom built sphere where all vertices on the sphere are the same distance apart from each other. It seems to work just fine.... until I turn the "Resolution" past 5 (Where resolution is just the number of times the function iterates smoothing out the sphere). I am not sure what the problem could be, as the code works perfectly up until that point.

    C# code:
    Code (CSharp):
    1. using System;
    2. using System.Collections;
    3. using Unity.Collections.LowLevel.Unsafe;
    4. using UnityEditor;
    5. using UnityEngine;
    6.  
    7. public class PGSUserUI : MonoBehaviour {
    8.     public ComputeShader CS_FirstWorld;
    9.     public int resolution;
    10.     public Mesh planetMesh;
    11.     public bool doubleSidedShape;
    12.  
    13.     public void CreateNewWorld() {
    14.         for (int i = 0; i < 10; i++) {
    15.             StartCoroutine(GenerateSphere());
    16.         }
    17.     }
    18.  
    19.     public IEnumerator GenerateSphere() {
    20.         //Gonna create a texture and pass that to the GPU
    21.         Vector3[] initPositions = new[] {
    22.             //Normal object faces
    23.             new Vector3(0,1,0), new Vector3(0,0,1), new Vector3(1,0,0),
    24.             new Vector3(0,1,0), new Vector3(-1,0,0), new Vector3(0,0,1),
    25.             new Vector3(0,1,0), new Vector3(0,0,-1), new Vector3(-1,0,0),
    26.             new Vector3(0,1,0), new Vector3(1,0,0), new Vector3(0,0,-1),
    27.             new Vector3(0,-1,0), new Vector3(1,0,0), new Vector3(0,0,1),
    28.             new Vector3(0,-1,0), new Vector3(-1,0,0), new Vector3(0,0,-1),
    29.             new Vector3(0,-1,0), new Vector3(0,0,-1), new Vector3(1,0,0),
    30.             new Vector3(0,-1,0), new Vector3(0,0,1), new Vector3(-1,0,0),
    31.         };
    32.        
    33.         Vector3[] InputShape = initPositions;
    34.         for (int resIteration = 0; resIteration < resolution; resIteration++) {
    35.             DateTime before = DateTime.Now;
    36.            
    37.             int inputPoints = InputShape.Length;
    38.             int numOfPositions = 4 * inputPoints;
    39.  
    40.             Debug.Log("Inputs: " + inputPoints + "\t\tNumOfPos: " + numOfPositions + "\t\tNumTriangles: " + (numOfPositions / 3));
    41.  
    42.             ComputeBuffer inputBuffer = new ComputeBuffer(inputPoints, UnsafeUtility.SizeOf(typeof(Vector3)));
    43.             ComputeBuffer outputBuffer = new ComputeBuffer(numOfPositions, UnsafeUtility.SizeOf(typeof(Vector3)));
    44.  
    45.             inputBuffer.SetData(InputShape);
    46.            
    47.             CS_FirstWorld.SetFloat("size", 1.50f);
    48.             CS_FirstWorld.SetBuffer(0, "InputTrigData", inputBuffer);
    49.             CS_FirstWorld.SetBuffer(0, "OutputTrigData", outputBuffer);
    50.             CS_FirstWorld.Dispatch(0, 64, 64, 64);
    51.  
    52.             Vector3[] OutputTrigData = new Vector3[numOfPositions];
    53.             outputBuffer.GetData(OutputTrigData);
    54.             inputBuffer.Release();
    55.             outputBuffer.Release();
    56.  
    57.             Debug.Log("Milliseconds: " + DateTime.Now.Subtract(before).Milliseconds);
    58.  
    59.             //GPU has completed it's calculation! ---------------------------------------------------
    60.             InputShape = OutputTrigData;
    61.         }
    62.  
    63.         if (doubleSidedShape) {
    64.             Vector3[] result = new Vector3[InputShape.Length * 2];
    65.             for (int i = 0; i < InputShape.Length; i++) {
    66.                 result[i] = InputShape[i];
    67.             }
    68.             for (int i = 0; i < InputShape.Length / 3; i++) {
    69.                 result[InputShape.Length + 3*i + 0] = InputShape[3*i + 0];
    70.                 result[InputShape.Length + 3*i + 1] = InputShape[3*i + 2];
    71.                 result[InputShape.Length + 3*i + 2] = InputShape[3*i + 1];
    72.             }
    73.             InputShape = result;
    74.         }
    75.         Debug.Log("Vertices: " + InputShape.Length);
    76.         Debug.Log("Triangles: " + InputShape.Length/3);
    77.         int[] triangles = new int[InputShape.Length];
    78.         for(int i = 0; i < InputShape.Length; i++) {
    79.             triangles[i] = i;
    80.         }
    81.  
    82.         planetMesh = new Mesh {
    83.             vertices = InputShape,
    84.             triangles = triangles
    85.         };
    86.         planetMesh.RecalculateNormals();
    87.  
    88.         GameObject go = new GameObject();
    89.         go.AddComponent<MeshFilter>().mesh = planetMesh;
    90.         MeshRenderer mr = go.AddComponent<MeshRenderer>();
    91.         mr.material = new Material(Shader.Find("Standard"));
    92.  
    93.         yield return null;
    94.     }
    95.  
    96.     Vector3 ColorToVector3(Color color) {
    97.         return new Vector3(color.r,color.g,color.b);
    98.     }
    99.  
    100.     Texture2D ToTexture2D(RenderTexture rTex) {
    101.         Texture2D tex = new Texture2D(512, 512, TextureFormat.RGB24, false);
    102.  
    103.         // ReadPixels looks at the active RenderTexture.
    104.         RenderTexture.active = rTex;
    105.         tex.ReadPixels(new Rect(0, 0, rTex.width, rTex.height), 0, 0);
    106.         tex.Apply();
    107.         return tex;
    108.     }
    109. }
    110.  
    111. [CustomEditor(typeof(PGSUserUI))]
    112. public class PGSUserUI_Inspector : Editor
    113. {
    114.     PGSUserUI baseScript;
    115.  
    116.     public override void OnInspectorGUI()
    117.     {
    118.         //base.OnInspectorGUI();
    119.  
    120.         EditorGUI.BeginChangeCheck();
    121.  
    122.         if (baseScript == default) baseScript = (PGSUserUI)target;
    123.        
    124.         baseScript.CS_FirstWorld = (ComputeShader)EditorGUILayout.ObjectField("Compute Shader", baseScript.CS_FirstWorld, typeof(ComputeShader), false);
    125.         baseScript.resolution = EditorGUILayout.IntField("Sphere Resolution", baseScript.resolution);
    126.         baseScript.doubleSidedShape = EditorGUILayout.BeginToggleGroup("Shape has internal visuals", baseScript.doubleSidedShape);
    127.         EditorGUILayout.EndToggleGroup();
    128.  
    129.         if (GUILayout.Button("Generate new Planet"))
    130.         {
    131.             baseScript.CreateNewWorld();
    132.         }
    133.  
    134.         if (EditorGUI.EndChangeCheck())
    135.         {
    136.             EditorUtility.SetDirty(baseScript);
    137.         }
    138.     }
    139. }
    140.  
    And the HLSL code:

    Code (CSharp):
    1. #pragma kernel SubdivideMesh
    2.  
    3. float size;
    4. RWStructuredBuffer<float3> InputTrigData;
    5. RWStructuredBuffer<float3> OutputTrigData;
    6.  
    7. float3 NormalizeLength(float3 vect) {
    8.     //NOTE: This function ignores W component while calculating length!
    9.     float startLen = sqrt(pow(vect[0],2) + pow(vect[1],2) + pow(vect[2],2));
    10.     if (startLen == 1) { return vect; }
    11.  
    12.     float3 result = {
    13.         vect[0] / startLen,
    14.         vect[1] / startLen,
    15.         vect[2] / startLen
    16.     };
    17.  
    18.     return result;
    19. }
    20.  
    21. float3 GetMiddleVector(float3 in1, float3 in2) {
    22.     float3 result = {
    23.         (in1[0] + in2[0]) / 2,
    24.         (in1[1] + in2[1]) / 2,
    25.         (in1[2] + in2[2]) / 2
    26.     };
    27.     return result;
    28. }
    29.  
    30. float3 GetNormalizedMiddleVector(float3 in1, float3 in2) {
    31.     return NormalizeLength(GetMiddleVector(in1, in2));
    32. }
    33.  
    34. float3 RescaleVector(float3 in1, float len) {
    35.     float3 result = {
    36.         NormalizeLength(in1)[0] * len,
    37.         NormalizeLength(in1)[1] * len,
    38.         NormalizeLength(in1)[2] * len
    39.     };
    40.     return result;
    41. }
    42.  
    43. [numthreads(8, 8, 8)]
    44. void SubdivideMesh(uint3 id : SV_DispatchThreadID) {
    45.     //Triangle is given by x,y,z coordinates in clockwise direction
    46.  
    47.     //Get the triangle position. Gotta figure out how to change index based on id.x or id.y
    48.     int iX = id.x;
    49.     int iY = id.y;
    50.     int iZ = id.z;
    51.  
    52.     int maxX = 8;
    53.     int maxY = 8;
    54.  
    55.     //Define in clockwise direction
    56.     float3 origPos1 = InputTrigData[3 * (iX + maxX * (iY + iZ * maxY)) + 0];
    57.     float3 origPos2 = InputTrigData[3 * (iX + maxX * (iY + iZ * maxY)) + 1];
    58.     float3 origPos3 = InputTrigData[3 * (iX + maxX * (iY + iZ * maxY)) + 2];
    59.     float3 newEdgePosA = GetNormalizedMiddleVector(origPos1, origPos2);
    60.     float3 newEdgePosB = GetNormalizedMiddleVector(origPos2, origPos3);
    61.     float3 newEdgePosC = GetNormalizedMiddleVector(origPos3, origPos1);
    62.  
    63.     //Now that we have all the points, append to the texture
    64.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 0] =  RescaleVector(origPos1,        size);
    65.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 1] =  RescaleVector(newEdgePosA,    size);
    66.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 2] =  RescaleVector(newEdgePosC,    size);
    67.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 3] =  RescaleVector(newEdgePosA,    size);
    68.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 4] =  RescaleVector(origPos2,        size);
    69.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 5] =  RescaleVector(newEdgePosB,    size);
    70.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 6] =  RescaleVector(newEdgePosA,    size);
    71.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 7] =  RescaleVector(newEdgePosB,    size);
    72.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 8] =  RescaleVector(newEdgePosC,    size);
    73.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 9] =  RescaleVector(newEdgePosC,    size);
    74.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 10] = RescaleVector(newEdgePosB,    size);
    75.     OutputTrigData[12 * (iX + maxX * (iY + iZ * maxY)) + 11] = RescaleVector(origPos3,        size);
    76. }
    To run it, create an empty gameobject in your world, place the C# script on it. Lastly, point the "Compute Shader" to the HLSL code and set the resolution to any value. 5 and below works perfectly. 6 and above seems to completely break, and I can't see why or how.

    I thought it might be a memory issue, the GPU running out of memory somewhere, but at 32,000 triangles on a GTX1070, I shouldn't be getting even remotely close enough to cause issues.

    Any pointers, or if anyone has any clues as to what part might be breaking, I would be incredibly greatful!!
     
  2. grizzly

    grizzly

    Joined:
    Dec 5, 2012
    Posts:
    356
    What exactly is "breaking"? Is it crashing your GPU?

    Your compute workload is rather excessive. 8x8x8 = 512 threads per group and you're dispatching 64 groups per dimension, and then iterating that 5+ times.

    Lower num_threads to [4,4,4] for 64 threads max and then calculate the correct minimum required number of thread groups to dispatch.
    Code (CSharp):
    1. var numThreadGroups = Mathf.CeilToInt(numPointsPerDimension/4f);
    2. Dispatch(numThreadGroups, numThreadGroups, numThreadGroups);

    Edit: 3D indices are really best suited to working with 3D textures. In this case usage of a single dimension will suffice.

    Lower num_threads to [64,1,1] for 64 threads max and then calculate the correct minimum required number of thread groups to dispatch.
    Code (CSharp):
    1. var numThreadGroupsX = Mathf.CeilToInt(inputPoints/64f);
    2. Dispatch(0, numThreadGroupsX, 1, 1);
    And adjust the indices accordingly;
    Code (CSharp):
    1. InputTrigData[3 * id.x + N]
    2. OutputTrigData[12 * id.x + N]
    Another limitation here will be the Mesh index format. It's limited to 16 bit by default, and you'll need 32 bit for mesh resolutions beyond 65,535 vertices. See here.
     
    Last edited: Dec 29, 2021
    Technokid2000 likes this.
  3. Technokid2000

    Technokid2000

    Joined:
    Dec 18, 2016
    Posts:
    36
    Oh, sorry, I should have elaborated. The mesh that is being generated looks like a sphere until you increase the "Resolution" variable past a value of 5, where it "breaks" and the mesh no longer is a sphere.

    I tried your solution for decreasing GPU workload, but it doesn't seem to have changed anything. Still takes the same amount of time to execute
     
  4. grizzly

    grizzly

    Joined:
    Dec 5, 2012
    Posts:
    356
    Group sizes of 32/64 threads better utilize the GPU due to how warps/wavefronts work. But that's another topic tbf. There's a good read here if you're interested :)
     
  5. Technokid2000

    Technokid2000

    Joined:
    Dec 18, 2016
    Posts:
    36
    Ok, so I got it working. Grizzly, you were right on both accouunts.

    Firstly, I didn't realise that meshes had a default max size limit of 65,535 vertices since I wasn't aware it was held in uint16. Changing it to uint32 seems to have solved it (Makes sense since a resolution of 5 gives 32,000 vertices, and res 6 would have been about 120,000 vertices).

    Secondly, I don't really understand how GPU clustering and threadgroups work properly yet (I really must read that link you sent, looks super useful), but changing it to numThreads[64,1,1] for some reason has sped up the resolution 6 from about 15 minutes of computing to about half a second. Idk why or how, but it works!!

    Thankyou so much! I never would have figured it out on my own. Have a lovely day/night!
     
    Last edited: Dec 29, 2021
    grizzly and Sound-Master like this.