Hi, I am running marching cubes on compute shaders and then was constructing meshes on the CPU which works fine. I have now tried to use Code (CSharp): Graphics.DrawProceduralIndirect to draw the meshes straight from the GPU however nothing is rendering with this new approach. I was wondering if anyone could figure out what is wrong? The compute shader: Code (CSharp): // Each #kernel tells which function to compile; you can have many kernels #pragma kernel March #include "MarchTables.compute" float isoLevel = 0; int width = 16; float resolution = 1; struct Triangle { float3 v[3]; }; float3 interpolateVertices(float3 v1, float3 v2, float value1, float value2) { float mu = (isoLevel - value1) / (value2 - value1); return float3(v1.xyz + mu * (v2.xyz - v1.xyz)); } int flattenedIndex(int x, int y, int z) { return (x * (width + 1) * (width + 1)) + (y * (width + 1)) + z; } AppendStructuredBuffer<Triangle> triangleBuffer; RWStructuredBuffer<float> densityBuffer; //int height; //int width; //float isoLevel; [numthreads(8, 8, 8)] void March(int3 id : SV_DispatchThreadID) { if (id.x >= (width) || id.y >= (width) || id.z >= (width)) { return; } float cubeCorners[8] = { densityBuffer[flattenedIndex(id.x, id.y, id.z + 1)], densityBuffer[flattenedIndex(id.x + 1, id.y, id.z + 1)], densityBuffer[flattenedIndex(id.x + 1, id.y, id.z)], densityBuffer[flattenedIndex(id.x, id.y, id.z)], densityBuffer[flattenedIndex(id.x, id.y + 1, id.z + 1)], densityBuffer[flattenedIndex(id.x + 1, id.y + 1, id.z + 1)], densityBuffer[flattenedIndex(id.x + 1, id.y + 1, id.z)], densityBuffer[flattenedIndex(id.x, id.y + 1, id.z)], }; int cubeIndex = 0; if (cubeCorners[0] > isoLevel) cubeIndex |= 1; if (cubeCorners[1] > isoLevel) cubeIndex |= 2; if (cubeCorners[2] > isoLevel) cubeIndex |= 4; if (cubeCorners[3] > isoLevel) cubeIndex |= 8; if (cubeCorners[4] > isoLevel) cubeIndex |= 16; if (cubeCorners[5] > isoLevel) cubeIndex |= 32; if (cubeCorners[6] > isoLevel) cubeIndex |= 64; if (cubeCorners[7] > isoLevel) cubeIndex |= 128; if (cubeIndex == 0 || cubeIndex == 255) { return; } // Create triangles for current cube configuration for (int i = 0; triangulation[cubeIndex][i] != -1; i += 3) { // Get indices of corner points A and B for each of the three edges // of the cube that need to be joined to form the triangle. int a0 = cornerIndexAFromEdge[triangulation[cubeIndex][i]]; int b0 = cornerIndexBFromEdge[triangulation[cubeIndex][i]]; int a1 = cornerIndexAFromEdge[triangulation[cubeIndex][i + 1]]; int b1 = cornerIndexBFromEdge[triangulation[cubeIndex][i + 1]]; int a2 = cornerIndexAFromEdge[triangulation[cubeIndex][i + 2]]; int b2 = cornerIndexBFromEdge[triangulation[cubeIndex][i + 2]]; Triangle tri; tri.v[0] = interpolateVertices(float3(((id * resolution) + (vertexPositions[a0] * resolution))), float3(((id * resolution) + (vertexPositions[b0] * resolution))), cubeCorners[a0], cubeCorners[b0]); tri.v[1] = interpolateVertices(float3(((id * resolution) + (vertexPositions[a1] * resolution))), float3(((id * resolution) + (vertexPositions[b1] * resolution))), cubeCorners[a1], cubeCorners[b1]); tri.v[2] = interpolateVertices(float3(((id * resolution) + (vertexPositions[a2] * resolution))), float3(((id * resolution) + (vertexPositions[b2] * resolution))), cubeCorners[a2], cubeCorners[b2]); triangleBuffer.Append(tri); } } The shader: Code (CSharp): // Upgrade NOTE: replaced 'mul(UNITY_MATRIX_MVP,*)' with 'UnityObjectToClipPos(*)' Shader "Unlit/TerrainShader" { Properties { } SubShader { Tags { "RenderType" = "Opaque" } LOD 100 Pass { CGPROGRAM #pragma enable_d3d11_debug_symbols #pragma target 5.0 #pragma vertex vert #pragma fragment frag // make fog work #pragma multi_compile_fog #include "UnityCG.cginc" struct Triangle { float3 v[3]; }; uniform StructuredBuffer<Triangle> triangles; uniform float4x4 model; struct appdata { float4 vertex : POSITION; float2 uv : TEXCOORD0; }; struct v2f { float4 pos: SV_POSITION; }; v2f vert (uint id : SV_VertexID) { uint pid = id / 3; uint vid = id % 3; float3 pos = triangles[pid].v[vid]; v2f o; o.pos = mul(UNITY_MATRIX_VP,mul(model,pos)); return o; } float4 frag(v2f i) : SV_Target { return float4(1,0.5,0,1); } ENDCG } } } The C# code: Code (CSharp): public void GeneratePlanetOnGPU() { argBuffer = new ComputeBuffer(4, sizeof(int), ComputeBufferType.IndirectArguments); int[] args = new int[] { 0, 1, 0, 0 }; argBuffer.SetData(args); int threadGroups = (width / 8); int kernelHandle = shader.FindKernel("March"); if (!useComputeDensity) { densityMap = new float[(width + 1) * (width + 1) * (width + 1)]; densityBuffer = new ComputeBuffer((width + 1) * (width + 1) * (width + 1), sizeof(float)); densityBuffer.SetData(densityMap); shader.SetBuffer(kernelHandle, "densityBuffer", densityBuffer); } else { ComputeBuffer db = dg.Generate(radius, width, resolution, new float[] { planetCentre.x, planetCentre.y, planetCentre.z }, new float[] { worldPos.x, worldPos.y, worldPos.z }, octaves, minHeight, strength); shader.SetBuffer(kernelHandle, "densityBuffer", db); } triangleCountBuffer = new ComputeBuffer(1, sizeof(int), ComputeBufferType.Raw); triangleBuffer = new ComputeBuffer((width) * (width) * (width) * 5, sizeof(float) * 3 * 3, ComputeBufferType.Append); triangleBuffer.SetCounterValue(0); shader.SetBuffer(kernelHandle, "triangleBuffer", triangleBuffer); shader.SetInt("width", width); shader.SetFloat("resolution", resolution); shader.Dispatch(kernelHandle, threadGroups, threadGroups, threadGroups); ComputeBuffer.CopyCount(triangleBuffer, argBuffer, 0); argBuffer.GetData(args); int tris = args[0]; args[0] *= 3; argBuffer.SetData(args); float size = 16 * resolution; Vector3 centre = worldPos + (Vector3.one * (size / 2)); b = new Bounds(centre, new Vector3(size, size, size)); setup = true; } private void OnRenderObject() { if (setup) { mat.SetPass(0); mat.SetBuffer("triangles", triangleBuffer); mat.SetMatrix("model", transform.localToWorldMatrix); Graphics.DrawProceduralIndirect(mat,b,MeshTopology.Triangles,argBuffer); } } The only difference between this and the CPU method is how the meshes are constructed. The gameobject the script is running on has a mesh filter and mesh renderer but I am unclear if it needs this. I assign the material in the inspector. Also, the gameobject is a prefab that is instantiated at runtime. There are lots of these prefabs each running this C# script.
I'm going through the same struggle right now. The problem is that all the steps to get it working are horribly documented and all examples I found are centered around DrawMeshInstancedIndirect and the few using DrawProceduralIndirect. My case is different because I'm using surface shaders (I had to force EnableKeyword("PROCEDURAL_INSTANCING_ON") on the material itself because Unity was refusing to enable the instancing code generation), but what truly helped me finding out all my bugs was using RenderDoc to capture the frame, examine the draw calls, view the buffer contents, and debug the vertex shaders. My last bug was using the wrong winding order in my index buffers (clockwise instead of counter-clockwise), so everything was being backface culled:
Sounds like we're in the exact same boat, lol. And I have this bad feeling that once we finally get there we'll discover it confers terrible performance
Yes this. Any luck? I implemented it another way first, but then realized I can't have unique meshes. So now i'm doing it this way to optimize gpu data sends. But i'm worried that it might still have overheads..
I'm looking for exactely the same thing andif it's not too late, maybe this can help you out. https://www.ronja-tutorials.com/post/051-draw-procedural/