I followed two tutorials on making a grass shader using Geometry Shader (this one, and this one to upgrade the first one). Even if the second article upgraded the performance a bit, it wasn't enough. So I learned about GPU instancing, but I didnt found any thing on the subject on the Unity Documentation. So my question is, Is it possible to use GPU instancing and how to do it. Code- Grass.shader: Code (CSharp): Shader "Roystan/Grass" { Properties { [Header(Shading)] _TopColor("Top Color", Color) = (1,1,1,1) _BottomColor("Bottom Color", Color) = (1,1,1,1) _TranslucentGain("Translucent Gain", Range(0,1)) = 0.5 _BendRotationRandom("Bend Rotation Random", Range(0, 1)) = 0.2 _BladeWidth("Blade Width", Float) = 0.05 _BladeWidthRandom("Blade Width Random", Float) = 0.02 _BladeHeight("Blade Height", Float) = 0.5 _BladeHeightRandom("Blade Height Random", Float) = 0.3 _TessellationUniform("Tessellation Uniform", Range(1, 64)) = 1 _WindDistortionMap("Wind Distortion Map", 2D) = "white" {} _WindFrequency("Wind Frequency", Vector) = (0.05, 0.05, 0, 0) _WindStrength("Wind Strength", Float) = 1 _BladeForward("Blade Forward Amount", Float) = 0.38 _BladeCurve("Blade Curvature Amount", Range(1, 4)) = 2 _minDist("Minimum Distance", Float) = 10 _maxDist("Maximum Distance", Float) = 25 _SlopeLimit("Slope Limit", Float) = 0 _BeachLimit("Beach Limit", Float) = 0 _HeightLimit("Height Limit", Float) = 0 } CGINCLUDE #include "Shaders/CustomTessellation.cginc" #include "UnityCG.cginc" #include "Autolight.cginc" #define BLADE_SEGMENTS 2 float _BendRotationRandom; float _BladeHeight; float _BladeHeightRandom; float _BladeWidth; float _BladeWidthRandom; sampler2D _WindDistortionMap; float4 _WindDistortionMap_ST; float2 _WindFrequency; float _WindStrength; float _BladeForward; float _BladeCurve; float _minDistance; float _maxDistance; float _SlopeLimit; float _BeachLimit; float _HeightLimit; float rand(float3 co) { return frac(sin(dot(co.xyz, float3(12.9898, 78.233, 53.539))) * 43758.5453); } // Construct a rotation matrix that rotates around the provided axis, sourced from: // https://gist.github.com/keijiro/ee439d5e7388f3aafc5296005c8c3f33 float3x3 AngleAxis3x3(float angle, float3 axis) { float c, s; sincos(angle, s, c); float t = 1 - c; float x = axis.x; float y = axis.y; float z = axis.z; return float3x3( t * x * x + c, t * x * y - s * z, t * x * z + s * y, t * x * y + s * z, t * y * y + c, t * y * z - s * x, t * x * z - s * y, t * y * z + s * x, t * z * z + c ); } struct geometryOutput { float4 pos : SV_POSITION; float2 uv : TEXCOORD0; unityShadowCoord4 _ShadowCoord : TEXCOORD1; float3 normal : NORMAL; }; geometryOutput VertexOutput(float3 pos, float2 uv, float3 normal) { geometryOutput o; o.pos = UnityObjectToClipPos(pos); o.uv = uv; o._ShadowCoord = ComputeScreenPos(o.pos); o.normal = UnityObjectToWorldNormal(normal); #if UNITY_PASS_SHADOWCASTER o.pos = UnityApplyLinearShadowBias(o.pos); #endif return o; } geometryOutput GenerateGrassVertex(float3 vertexPosition, float width, float height, float forward, float2 uv, float3x3 transformMatrix) { float3 tangentPoint = float3(width, forward, height); float3 tangentNormal = normalize(float3(0, -1, forward)); float3 localNormal = mul(transformMatrix, tangentNormal); float3 localPosition = vertexPosition + mul(transformMatrix, tangentPoint); return VertexOutput(localPosition, uv, localNormal); } [maxvertexcount(BLADE_SEGMENTS * 2 + 1)] void geo(triangle vertexOutput IN[3]: SV_POSITION, inout TriangleStream<geometryOutput> triStream) { float3 pos = IN[0].vertex; float3 vNormal = IN[0].normal; float4 vTangent = IN[0].tangent; float3 vBinormal = cross(vNormal, vTangent) * vTangent.w; float3x3 tangentToLocal = float3x3( vTangent.x, vBinormal.x, vNormal.x, vTangent.y, vBinormal.y, vNormal.y, vTangent.z, vBinormal.z, vNormal.z ); float3x3 facingRotationMatrix = AngleAxis3x3(rand(pos) * UNITY_TWO_PI, float3(0, 0, 1)); float3x3 bendRotationMatrix = AngleAxis3x3(rand(pos.zzx) * _BendRotationRandom * UNITY_PI * 0.5, float3(-1, 0, 0)); float2 uv = pos.xz * _WindDistortionMap_ST.xy + _WindDistortionMap_ST.zw + _WindFrequency * _Time.y; float2 windSample = (tex2Dlod(_WindDistortionMap, float4(uv, 0, 0)).xy * 2 - 1) * _WindStrength; float3 wind = normalize(float3(windSample.x, windSample.y, 0)); float3x3 windRotation = AngleAxis3x3(UNITY_PI * windSample, wind); float3x3 transformationMatrix = mul(mul(mul(tangentToLocal, windRotation), facingRotationMatrix), bendRotationMatrix); float3x3 transformationMatrixFacing = mul(tangentToLocal, facingRotationMatrix); float height = (rand(pos.zyx) * 2 - 1) * _BladeHeightRandom + _BladeHeight; float forward = rand(pos.yyz) * _BladeForward; float width = (rand(pos.xzy) * 2 - 1) * _BladeWidthRandom + _BladeWidth; float3 worldPos = mul(unity_ObjectToWorld, IN[0].vertex.xyz); float3 worldNormal = mul(unity_ObjectToWorld, float4(IN[0].normal, 0.0)).xyz; float slope = worldNormal.y; float dist = distance(_WorldSpaceCameraPos, mul(unity_ObjectToWorld, IN[0].vertex).xyz); int shouldCreateGrass = sign(slope - _SlopeLimit) + sign(_maxDist - dist) + sign(pos.y - _BeachLimit) + sign( _HeightLimit - pos.y); //we stick all the vertex generation code in this if statement if (shouldCreateGrass == 4) { for (int i = 0; i < BLADE_SEGMENTS; i++) { float t = i / (float)BLADE_SEGMENTS; float segmentHeight = height * t; float segmentWidth = width * (1 - t); float segmentForward = pow(t, _BladeCurve) * forward; float3x3 transformMatrix = i == 0 ? transformationMatrixFacing : transformationMatrix; triStream.Append(GenerateGrassVertex(pos, segmentWidth, segmentHeight, segmentForward, float2(0, t), transformMatrix)); triStream.Append(GenerateGrassVertex(pos, -segmentWidth, segmentHeight, segmentForward, float2(1, t), transformMatrix)); } triStream.Append(GenerateGrassVertex(pos, 0, height, forward, float2(0.5, 1), transformationMatrix)); } } ENDCG SubShader { Cull Off Pass { Tags { "RenderType" = "Opaque" "LightMode" = "ForwardBase" } CGPROGRAM #pragma vertex vert #pragma fragment frag #pragma geometry geo #pragma target 4.6 #pragma multi_compile_fwdbase #pragma hull hull #pragma domain domain #include "Lighting.cginc" float4 _TopColor; float4 _BottomColor; float _TranslucentGain; float4 frag(geometryOutput i, fixed facing : VFACE) : SV_Target { float3 normal = facing > 0 ? i.normal : -i.normal; float shadow = SHADOW_ATTENUATION(i); float NdotL = saturate(saturate(dot(normal, _WorldSpaceLightPos0)) + _TranslucentGain) * shadow; float3 ambient = ShadeSH9(float4(normal, 1)); float4 lightIntensity = NdotL * _LightColor0 + float4(ambient, 1); float4 col = lerp(_BottomColor, _TopColor * lightIntensity, i.uv.y); return col; } ENDCG } Pass { Tags { "LightMode" = "ShadowCaster" } CGPROGRAM #pragma vertex vert #pragma geometry geo #pragma fragment frag #pragma hull hull #pragma domain domain #pragma target 4.6 #pragma multi_compile_shadowcaster float4 frag(geometryOutput i) : SV_Target { SHADOW_CASTER_FRAGMENT(i); } ENDCG } } } CustomTessellation.cginc: Code (CSharp): // Tessellation programs based on this article by Catlike Coding: // https://catlikecoding.com/unity/tutorials/advanced-rendering/tessellation/ struct vertexInput { float4 vertex : POSITION; float3 normal : NORMAL; float4 tangent : TANGENT; }; struct vertexOutput { float4 vertex : SV_POSITION; float3 normal : NORMAL; float4 tangent : TANGENT; }; struct TessellationFactors { float edge[3] : SV_TessFactor; float inside : SV_InsideTessFactor; }; vertexInput vert(vertexInput v) { return v; } vertexOutput tessVert(vertexInput v) { vertexOutput o; // Note that the vertex is NOT transformed to clip // space here; this is done in the grass geometry shader. o.vertex = v.vertex; o.normal = v.normal; o.tangent = v.tangent; return o; } float _TessellationUniform; float _minDist; float _maxDist; float TessellationEdgeFactor(vertexInput cp0, vertexInput cp1){ float3 p0 = mul(unity_ObjectToWorld, float4(cp0.vertex.xyz, 1)).xyz; float3 p1 = mul(unity_ObjectToWorld, float4(cp1.vertex.xyz, 1)).xyz; float edgeLength = distance(p0, p1); float3 edgeCenter = (p0 + p1) * 0.5; float viewDistance = distance(edgeCenter, _WorldSpaceCameraPos); return clamp(1.0 - (viewDistance - _minDist) / (_maxDist - _minDist), 0.01, 1.0) * _TessellationUniform; } TessellationFactors patchConstantFunction (InputPatch<vertexInput, 3> patch) { TessellationFactors f; f.edge[0] = TessellationEdgeFactor(patch[1], patch[2]); f.edge[1] = TessellationEdgeFactor(patch[2], patch[0]); f.edge[2] = TessellationEdgeFactor(patch[0], patch[1]); f.inside = (TessellationEdgeFactor(patch[1], patch[2]) + TessellationEdgeFactor(patch[2], patch[0]) + TessellationEdgeFactor(patch[0], patch[1])) * (1 / 3.0); // f.edge[0] = _TessellationUniform; // f.edge[1] = _TessellationUniform; // f.edge[2] = _TessellationUniform; // f.inside = _TessellationUniform; return f; } [UNITY_domain("tri")] [UNITY_outputcontrolpoints(3)] [UNITY_outputtopology("triangle_cw")] [UNITY_partitioning("integer")] [UNITY_patchconstantfunc("patchConstantFunction")] vertexInput hull (InputPatch<vertexInput, 3> patch, uint id : SV_OutputControlPointID) { return patch[id]; } [UNITY_domain("tri")] vertexOutput domain(TessellationFactors factors, OutputPatch<vertexInput, 3> patch, float3 barycentricCoordinates : SV_DomainLocation) { vertexInput v; #define MY_DOMAIN_PROGRAM_INTERPOLATE(fieldName) v.fieldName = \ patch[0].fieldName * barycentricCoordinates.x + \ patch[1].fieldName * barycentricCoordinates.y + \ patch[2].fieldName * barycentricCoordinates.z; MY_DOMAIN_PROGRAM_INTERPOLATE(vertex) MY_DOMAIN_PROGRAM_INTERPOLATE(normal) MY_DOMAIN_PROGRAM_INTERPOLATE(tangent) return tessVert(v); }