我正在使用https://vulkan-tutorial.com/深度缓冲代码作为基础。进行了一些更改以更新每帧的命令缓冲区。
我使用的是检查fps的粗略方法。不确定它的准确度如何,但是我正在对fps使用此检查。
static auto startTime = std::chrono::high_resolution_clock::now();
auto currentTime = std::chrono::high_resolution_clock::now();
float time = std::chrono::duration<float, std::chrono::seconds::period>(currentTime - startTime).count();
if (time < 1)
{
counter++;
}
else
{
int a = 34; //breakpoint put here to check the counter fps.
}
每帧没有纹理的任何方式(命令缓冲区仍在每帧更新。)fps约为3500 fps。如果我尝试每帧更新纹理,则fps会降至350ish fps。
这只是带有空白纹理的测试代码,但这是我第一次使用上载纹理并对其进行更新的过程。
void createTextureImage()
{
int Width = 1024;
int Height = 1024;
VkDeviceSize imageSize = Width * Height * sizeof(Pixel);
PixelImage.resize(Width * Height, Pixel(0xFF, 0x00, 0x00));
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory);
void* data;
vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data);
memcpy(data, PixelImage.data(), static_cast<size_t>(imageSize));
vkUnmapMemory(device, stagingBufferMemory);
createImage(Width, Height, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, textureImage, textureImageMemory);
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copyBufferToImage(stagingBuffer, textureImage, static_cast<uint32_t>(Width), static_cast<uint32_t>(Height));
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingBufferMemory, nullptr);
}
void UpdateTexture()
{
VkDeviceSize imageSize = 1024 * 1024 * sizeof(Pixel);
memset(&PixelImage[0], 0xFF, imageSize);
VkBuffer stagingBuffer;
VkDeviceMemory stagingBufferMemory;
createBuffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, stagingBuffer, stagingBufferMemory);
void* data;
vkMapMemory(device, stagingBufferMemory, 0, imageSize, 0, &data);
memcpy(data, PixelImage.data(), static_cast<size_t>(imageSize));
vkUnmapMemory(device, stagingBufferMemory);
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
copyBufferToImage(stagingBuffer, textureImage, static_cast<uint32_t>(1024), static_cast<uint32_t>(1024));
transitionImageLayout(textureImage, VK_FORMAT_R8G8B8A8_SRGB, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
vkDestroyBuffer(device, stagingBuffer, nullptr);
vkFreeMemory(device, stagingBufferMemory, nullptr);
vkDestroyImageView(device, textureImageView, nullptr);
CreateImageView();
}
我一直在玩弄它,似乎全部写入缓冲区并多次转换布局,这实际上减慢了速度。
关于上下文,这是其余的更新纹理过程。
UpdateTexture();
for (size_t i = 0; i < vulkanFrame.size(); i++)
{
VkDescriptorBufferInfo bufferInfo = {};
bufferInfo.buffer = uniformBuffers[i];
bufferInfo.offset = 0;
bufferInfo.range = sizeof(UniformBufferObject);
VkDescriptorImageInfo imageInfo = {};
imageInfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
imageInfo.imageView = textureImageView;
imageInfo.sampler = textureSampler;
std::array<VkWriteDescriptorSet, 2> descriptorWrites = {};
descriptorWrites[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[0].dstSet = descriptorSets[i];
descriptorWrites[0].dstBinding = 0;
descriptorWrites[0].dstArrayElement = 0;
descriptorWrites[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
descriptorWrites[0].descriptorCount = 1;
descriptorWrites[0].pBufferInfo = &bufferInfo;
descriptorWrites[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descriptorWrites[1].dstSet = descriptorSets[i];
descriptorWrites[1].dstBinding = 1;
descriptorWrites[1].dstArrayElement = 0;
descriptorWrites[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptorWrites[1].descriptorCount = 1;
descriptorWrites[1].pImageInfo = &imageInfo;
vkUpdateDescriptorSets(device, static_cast<uint32_t>(descriptorWrites.size()), descriptorWrites.data(), 0, nullptr);
}
此外,对于2D游戏的空白更新屏幕来说,具有很好的基本fps。我还将vulkan用于3d,但我也想用它做复古2d东西。
您每帧将从CPU发送4MB数据到GPU。在350 fps下,数据传输速度约为1.4GB /秒。考虑到所有因素,这相当不错。
登台缓冲区并不是真正的问题。一旦确定要从CPU向GPU发送数据,便丧失了一些性能。
如果您真的坚持要避免暂存,则可以检查以查看您的实现是否允许着色器从中采样线性纹理。在这种情况下,您可以将数据直接写入纹理的内存。但是,您需要对纹理进行双重缓冲,以免写入到GPU当前正在使用的纹理。但无论如何,您都需要这样做。
您可以做的更有效的事情是停止做毫无意义的事情。您需要停止:
transitionImageLayout
和copyBufferToImage
不仅在构建CB信息,而且还在提交信息。那会破坏性能(如果transitionImageLayout
也提交工作,则尤其是所有这些都会损害代码的CPU性能。它们不会更改GPU传输的实际时间,但是会使导致该传输的代码运行得慢得多。
我比Vulkan更熟悉DirectX。但是,如果不需要,您不想通过暂存缓冲区进行复制来更新纹理。如果您要清除每个帧的纹理,则应将其作为渲染通道的附件,并将loadOp设置为VK_ATTACHMENT_LOAD_OP_CLEAR。您还可以使用诸如vkCmdClearAttachments和vkCmdDrawIndexed之类的渲染命令来更新纹理。