WebGPU 中是否有任何方法可以测量多通道管道中特定计算通道的执行时间?
我期望找到客观的方法来对各种计算通道进行基准测试,以便找出哪个步骤减慢了管道速度。
我不知道“多通道管道”在WebGPU中意味着什么。
WebGPU中有两种类型的管道。渲染管道和计算管道。都没有“多次通过”。
如果您检查并启用了“时间戳查询”功能,那么您可以将时间戳添加到传递的开头和结尾。
步骤是
GPUQuerySet
,它是一个查询数组timestamp
部分,指定要使用的查询集以及查询集中的哪些索引来写入开始和结束时间戳resolveQuerySet
解决查询集中的查询。这会将查询结果复制到缓冲区示例:
@import url(https://webgpufundamentals.org/webgpu/resources/webgpu-lesson.css);
html, body {
margin: 0; /* remove the default margin */
height: 100%; /* make the html,body fill the page */
}
canvas {
display: block; /* make the canvas act like a block */
width: 100%; /* make the canvas fill its container */
height: 100%;
}
#info {
position: absolute;
top: 0;
left: 0;
margin: 0;
padding: 0.5em;
background-color: rgba(0, 0, 0, 0.8);
color: white;
}
<canvas></canvas>
<pre id="info"></pre>
<script type="module">
// WebGPU Timing - Step 1 - Animated
// from https://webgpufundamentals.org/webgpu/webgpu-timing-with-timestamp-w-average.html
import GUI from 'https://webgpufundamentals.org/3rdparty/muigui-0.x.module.js';
// A random number between [min and max)
// With 1 argument it will be [0 to min)
// With no arguments it will be [0 to 1)
const rand = (min, max) => {
if (min === undefined) {
min = 0;
max = 1;
} else if (max === undefined) {
max = min;
min = 0;
}
return min + Math.random() * (max - min);
};
class RollingAverage {
#total = 0;
#samples = [];
#cursor = 0;
#numSamples;
constructor(numSamples = 30) {
this.#numSamples = numSamples;
}
addSample(v) {
this.#total += v - (this.#samples[this.#cursor] || 0);
this.#samples[this.#cursor] = v;
this.#cursor = (this.#cursor + 1) % this.#numSamples;
}
get() {
return this.#total / this.#samples.length;
}
}
const fpsAverage = new RollingAverage();
const jsAverage = new RollingAverage();
const gpuAverage = new RollingAverage();
function createCircleVertices({
radius = 1,
numSubdivisions = 24,
innerRadius = 0,
startAngle = 0,
endAngle = Math.PI * 2,
} = {}) {
// 2 triangles per subdivision, 3 verts per tri
const numVertices = numSubdivisions * 3 * 2;
// 2 32-bit values for position (xy) and 1 32-bit value for color (rgb_)
// The 32-bit color value will be written/read as 4 8-bit values
const vertexData = new Float32Array(numVertices * (2 + 1));
const colorData = new Uint8Array(vertexData.buffer);
let offset = 0;
let colorOffset = 8;
const addVertex = (x, y, r, g, b) => {
vertexData[offset++] = x;
vertexData[offset++] = y;
offset += 1; // skip the color
colorData[colorOffset++] = r * 255;
colorData[colorOffset++] = g * 255;
colorData[colorOffset++] = b * 255;
colorOffset += 9; // skip extra byte and the position
};
const innerColor = [1, 1, 1];
const outerColor = [0.1, 0.1, 0.1];
// 2 vertices per subdivision
//
// 0--1 4
// | / /|
// |/ / |
// 2 3--5
for (let i = 0; i < numSubdivisions; ++i) {
const angle1 = startAngle + (i + 0) * (endAngle - startAngle) / numSubdivisions;
const angle2 = startAngle + (i + 1) * (endAngle - startAngle) / numSubdivisions;
const c1 = Math.cos(angle1);
const s1 = Math.sin(angle1);
const c2 = Math.cos(angle2);
const s2 = Math.sin(angle2);
// first triangle
addVertex(c1 * radius, s1 * radius, ...outerColor);
addVertex(c2 * radius, s2 * radius, ...outerColor);
addVertex(c1 * innerRadius, s1 * innerRadius, ...innerColor);
// second triangle
addVertex(c1 * innerRadius, s1 * innerRadius, ...innerColor);
addVertex(c2 * radius, s2 * radius, ...outerColor);
addVertex(c2 * innerRadius, s2 * innerRadius, ...innerColor);
}
return {
vertexData,
numVertices,
};
}
async function main() {
const adapter = await navigator.gpu?.requestAdapter();
const canTimestamp = adapter.features.has('timestamp-query');
const device = await adapter?.requestDevice({
requiredFeatures: [
...(canTimestamp ? ['timestamp-query'] : []),
],
});
if (!device) {
fail('need a browser that supports WebGPU');
return;
}
// Get a WebGPU context from the canvas and configure it
const canvas = document.querySelector('canvas');
const context = canvas.getContext('webgpu');
const presentationFormat = navigator.gpu.getPreferredCanvasFormat();
context.configure({
device,
format: presentationFormat,
});
const module = device.createShaderModule({
code: `
struct Vertex {
@location(0) position: vec2f,
@location(1) color: vec4f,
@location(2) offset: vec2f,
@location(3) scale: vec2f,
@location(4) perVertexColor: vec3f,
};
struct VSOutput {
@builtin(position) position: vec4f,
@location(0) color: vec4f,
};
@vertex fn vs(
vert: Vertex,
) -> VSOutput {
var vsOut: VSOutput;
vsOut.position = vec4f(
vert.position * vert.scale + vert.offset, 0.0, 1.0);
vsOut.color = vert.color * vec4f(vert.perVertexColor, 1);
return vsOut;
}
@fragment fn fs(vsOut: VSOutput) -> @location(0) vec4f {
return vsOut.color;
}
`,
});
const pipeline = device.createRenderPipeline({
label: 'per vertex color',
layout: 'auto',
vertex: {
module,
entryPoint: 'vs',
buffers: [
{
arrayStride: 2 * 4 + 4, // 2 floats, 4 bytes each + 4 bytes
attributes: [
{shaderLocation: 0, offset: 0, format: 'float32x2'}, // position
{shaderLocation: 4, offset: 8, format: 'unorm8x4'}, // perVertexColor
],
},
{
arrayStride: 4, // 4 bytes
stepMode: 'instance',
attributes: [
{shaderLocation: 1, offset: 0, format: 'unorm8x4'}, // color
],
},
{
arrayStride: 4 * 4, // 4 floats, 4 bytes each
stepMode: 'instance',
attributes: [
{shaderLocation: 2, offset: 0, format: 'float32x2'}, // offset
{shaderLocation: 3, offset: 8, format: 'float32x2'}, // scale
],
},
],
},
fragment: {
module,
entryPoint: 'fs',
targets: [{ format: presentationFormat }],
},
});
const kNumObjects = 10000;
const objectInfos = [];
// create 2 vertex buffers
const staticUnitSize =
4; // color is 4 bytes
const changingUnitSize =
2 * 4 + // offset is 2 32bit floats (4bytes each)
2 * 4; // scale is 2 32bit floats (4bytes each)
const staticVertexBufferSize = staticUnitSize * kNumObjects;
const changingVertexBufferSize = changingUnitSize * kNumObjects;
const staticVertexBuffer = device.createBuffer({
label: 'static vertex for objects',
size: staticVertexBufferSize,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
});
const changingVertexBuffer = device.createBuffer({
label: 'changing storage for objects',
size: changingVertexBufferSize,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
});
// offsets to the various uniform values in float32 indices
const kColorOffset = 0;
const kOffsetOffset = 0;
const kScaleOffset = 2;
{
const staticVertexValuesU8 = new Uint8Array(staticVertexBufferSize);
for (let i = 0; i < kNumObjects; ++i) {
const staticOffsetU8 = i * staticUnitSize;
// These are only set once so set them now
staticVertexValuesU8.set( // set the color
[rand() * 255, rand() * 255, rand() * 255, 255],
staticOffsetU8 + kColorOffset);
objectInfos.push({
scale: rand(0.2, 0.5),
offset: [rand(-0.9, 0.9), rand(-0.9, 0.9)],
velocity: [rand(-0.1, 0.1), rand(-0.1, 0.1)],
});
}
device.queue.writeBuffer(staticVertexBuffer, 0, staticVertexValuesU8);
}
// a typed array we can use to update the changingStorageBuffer
const vertexValues = new Float32Array(changingVertexBufferSize / 4);
const { vertexData, numVertices } = createCircleVertices({
radius: 0.5,
innerRadius: 0.25,
});
const vertexBuffer = device.createBuffer({
label: 'vertex buffer vertices',
size: vertexData.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
});
device.queue.writeBuffer(vertexBuffer, 0, vertexData);
const { querySet, resolveBuffer, resultBuffer } = (() => {
if (!canTimestamp) {
return {};
}
const querySet = device.createQuerySet({
type: 'timestamp',
count: 2,
});
const resolveBuffer = device.createBuffer({
size: querySet.count * 8,
usage: GPUBufferUsage.QUERY_RESOLVE | GPUBufferUsage.COPY_SRC,
});
const resultBuffer = device.createBuffer({
size: resolveBuffer.size,
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
});
return {querySet, resolveBuffer, resultBuffer };
})();
const renderPassDescriptor = {
label: 'our basic canvas renderPass with timing',
colorAttachments: [
{
// view: <- to be filled out when we render
clearValue: [0.3, 0.3, 0.3, 1],
loadOp: 'clear',
storeOp: 'store',
},
],
...(canTimestamp && {
timestampWrites: {
querySet,
beginningOfPassWriteIndex: 0,
endOfPassWriteIndex: 1,
},
}),
};
const infoElem = document.querySelector('#info');
let gpuTime = 0;
const settings = {
numObjects: 100,
};
const gui = new GUI();
gui.add(settings, 'numObjects', 0, kNumObjects, 1);
const euclideanModulo = (x, a) => x - a * Math.floor(x / a);
let then = 0;
function render(now) {
now *= 0.001; // convert to seconds
const deltaTime = now - then;
then = now;
const startTime = performance.now();
// Get the current texture from the canvas context and
// set it as the texture to render to.
renderPassDescriptor.colorAttachments[0].view =
context.getCurrentTexture().createView();
const encoder = device.createCommandEncoder();
const pass = encoder.beginRenderPass(renderPassDescriptor);
pass.setPipeline(pipeline);
pass.setVertexBuffer(0, vertexBuffer);
pass.setVertexBuffer(1, staticVertexBuffer);
pass.setVertexBuffer(2, changingVertexBuffer);
// Set the uniform values in our JavaScript side Float32Array
const aspect = canvas.width / canvas.height;
// set the scale and offset for each object
for (let ndx = 0; ndx < settings.numObjects; ++ndx) {
const {scale, offset, velocity} = objectInfos[ndx];
// -1.5 to 1.5
offset[0] = euclideanModulo(offset[0] + velocity[0] * deltaTime + 1.5, 3) - 1.5;
offset[1] = euclideanModulo(offset[1] + velocity[1] * deltaTime + 1.5, 3) - 1.5;
const off = ndx * (changingUnitSize / 4);
vertexValues.set(offset, off + kOffsetOffset);
vertexValues.set([scale / aspect, scale], off + kScaleOffset);
}
// upload all offsets and scales at once
device.queue.writeBuffer(
changingVertexBuffer, 0,
vertexValues, 0, settings.numObjects * changingUnitSize / 4);
pass.draw(numVertices, settings.numObjects);
pass.end();
if (canTimestamp) {
encoder.resolveQuerySet(querySet, 0, querySet.count, resolveBuffer, 0);
if (resultBuffer.mapState === 'unmapped') {
encoder.copyBufferToBuffer(resolveBuffer, 0, resultBuffer, 0, resultBuffer.size);
}
}
const commandBuffer = encoder.finish();
device.queue.submit([commandBuffer]);
if (canTimestamp && resultBuffer.mapState === 'unmapped') {
resultBuffer.mapAsync(GPUMapMode.READ).then(() => {
const times = new BigInt64Array(resultBuffer.getMappedRange());
gpuTime = Number(times[1] - times[0]);
gpuAverage.addSample(gpuTime / 1000);
resultBuffer.unmap();
});
}
const jsTime = performance.now() - startTime;
fpsAverage.addSample(1 / deltaTime);
jsAverage.addSample(jsTime);
infoElem.textContent = `\
fps: ${fpsAverage.get().toFixed(1)}
js: ${jsAverage.get().toFixed(1)}ms
gpu: ${canTimestamp ? `${gpuAverage.get().toFixed(1)}µs` : 'N/A'}
`;
requestAnimationFrame(render);
}
requestAnimationFrame(render);
const observer = new ResizeObserver(entries => {
for (const entry of entries) {
const canvas = entry.target;
const width = entry.contentBoxSize[0].inlineSize;
const height = entry.contentBoxSize[0].blockSize;
canvas.width = Math.max(1, Math.min(width, device.limits.maxTextureDimension2D));
canvas.height = Math.max(1, Math.min(height, device.limits.maxTextureDimension2D));
}
});
observer.observe(canvas);
}
function fail(msg) {
alert(msg);
}
main();
</script>
注意:您可能需要在 Chrome 中的 about:flags 中启用“webgpu 开发人员功能”或等待 v121 或 v122