优化 OpenGL 计算着色器

问题描述 投票:0回答:0

一直在尝试制作自己的游戏,现在正在研究PBR Bloom Effect。我做了一个简单的布隆测试应用程序并试图让它变得更好。因此,我对此有几个问题。

这是我的代码。 注意:我更喜欢计算着色器而不是离屏渲染,以便更快地绽放。

#include "main.h"
#include "FileIO.h"
#include "stb_image.h"
#include "glm/gtc/type_ptr.hpp"


int main(void)
{
    if (!InitGLFW())
        return -1;
    glEnable(GL_DEPTH_TEST);
    //glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
    glEnable(GL_CULL_FACE);
    glEnable(GL_FRAMEBUFFER_SRGB);

    Meshes::MeshProperties prop;
    Meshes::loadMeshFromFile("resource\\mesh\\StandartCube.mesh", &prop);

    glm::mat4 projM = glm::perspective(glm::radians(45.0f), (float)SCREEN_WIDTH / (float)SCREEN_HEIGHT, 0.1f, 25.0f);
    glm::mat4 viewM;

    unsigned int uniformBuffer;
    glCreateBuffers(1, &uniformBuffer);
    glNamedBufferData(uniformBuffer, 2 * sizeof(glm::mat4), nullptr, GL_DYNAMIC_DRAW);
    glNamedBufferSubData(uniformBuffer, 0, sizeof(glm::mat4), &projM);
    glBindBufferBase(GL_UNIFORM_BUFFER, 0, uniformBuffer);

    float screenBox[20] = 
    {
        -1.0f, -1.0f, 0.0f, 0.0f, 0.0f,
         1.0f, -1.0f, 0.0f, 1.0f, 0.0f,
         1.0f,  1.0f, 0.0f, 1.0f, 1.0f,
        -1.0f,  1.0f, 0.0f, 0.0f, 1.0f,
    };

    unsigned int boxIndices[6] = 
    {
        0,1,2,
        2,3,0
    };


    unsigned int boxVertexArrayID, boxVertexBufferID, boxIndexBufferID;
    glCreateVertexArrays(1, &boxVertexArrayID);
    glCreateBuffers(1, &boxVertexBufferID);
    glCreateBuffers(1, &boxIndexBufferID);

    glNamedBufferData(boxVertexBufferID, sizeof(screenBox), screenBox, GL_STATIC_DRAW);
    glNamedBufferData(boxIndexBufferID, sizeof(boxIndices), boxIndices, GL_STATIC_DRAW);

    glEnableVertexArrayAttrib(boxVertexArrayID, 0);
    glVertexArrayAttribBinding(boxVertexArrayID, 0, 0);
    glVertexArrayAttribFormat(boxVertexArrayID, 0, 3, GL_FLOAT, GL_FALSE, 0);

    glEnableVertexArrayAttrib(boxVertexArrayID, 1);
    glVertexArrayAttribBinding(boxVertexArrayID, 1, 0);
    glVertexArrayAttribFormat(boxVertexArrayID, 1, 2, GL_FLOAT, GL_FALSE, 12);

    glVertexArrayVertexBuffer(boxVertexArrayID, 0, boxVertexBufferID, 0, 20);
    glVertexArrayElementBuffer(boxVertexArrayID, boxIndexBufferID);
    
    unsigned int vertexArrayID, vertexBufferID, indexBufferID;
    glCreateVertexArrays(1, &vertexArrayID);
    glCreateBuffers(1, &vertexBufferID);
    glCreateBuffers(1, &indexBufferID);

    glNamedBufferData(vertexBufferID, prop.getVertexSize(), prop.vertexData, GL_STATIC_DRAW);
    glNamedBufferData(indexBufferID, prop.getIndexSize(), prop.indexData, GL_STATIC_DRAW);

    glEnableVertexArrayAttrib(vertexArrayID, 0);
    glVertexArrayAttribBinding(vertexArrayID, 0, 0);
    glVertexArrayAttribFormat(vertexArrayID, 0, 3, GL_FLOAT, GL_FALSE, 0);
    
    glEnableVertexArrayAttrib(vertexArrayID, 1);
    glVertexArrayAttribBinding(vertexArrayID, 1, 0);
    glVertexArrayAttribFormat(vertexArrayID, 1, 2, GL_FLOAT, GL_FALSE, 12);

    glEnableVertexArrayAttrib(vertexArrayID, 2);
    glVertexArrayAttribBinding(vertexArrayID, 2, 0);
    glVertexArrayAttribFormat(vertexArrayID, 2, 3, GL_FLOAT, GL_FALSE, 20);

    glVertexArrayVertexBuffer(vertexArrayID, 0, vertexBufferID, 0, 32);
    glVertexArrayElementBuffer(vertexArrayID, indexBufferID);
    
    // --------------------------------------------

    unsigned int FBOCAPTURE, RBO, tex, finalTexture;

    glCreateTextures(GL_TEXTURE_2D, 1, &tex);
    glTextureParameteri(tex, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTextureParameteri(tex, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTextureParameteri(tex, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTextureParameteri(tex, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTextureStorage2D(tex, 1, GL_R11F_G11F_B10F, FBO_SIZE, FBO_SIZE);
    glBindTextureUnit(0, tex);

    glCreateTextures(GL_TEXTURE_2D, 1, &finalTexture);
    glTextureParameteri(finalTexture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTextureParameteri(finalTexture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTextureParameteri(finalTexture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
    glTextureParameteri(finalTexture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
    glTextureStorage2D(finalTexture, 1, GL_R11F_G11F_B10F, FBO_SIZE, FBO_SIZE);
    glBindTextureUnit(1, finalTexture);

    glCreateRenderbuffers(1, &RBO);
    glNamedRenderbufferStorage(RBO, GL_DEPTH24_STENCIL8, FBO_SIZE, FBO_SIZE);
    
    glCreateFramebuffers(1, &FBOCAPTURE);
    glNamedFramebufferTexture(FBOCAPTURE, GL_COLOR_ATTACHMENT0, tex, 0);
    glNamedFramebufferTexture(FBOCAPTURE, GL_COLOR_ATTACHMENT1, finalTexture, 0);
    unsigned int drawBuffers[2] = {GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1};
    glNamedFramebufferDrawBuffers(FBOCAPTURE, 2, drawBuffers);
    glNamedFramebufferRenderbuffer(FBOCAPTURE, GL_DEPTH_STENCIL_ATTACHMENT, GL_RENDERBUFFER, RBO);


    unsigned int bloomTextures[5];
    glCreateTextures(GL_TEXTURE_2D, 5, bloomTextures);
    unsigned int size[2] = { FBO_SIZE, FBO_SIZE};
    for (unsigned int i = 0; i < 5; i++)
    {
        size[0] /= 2;
        size[1] /= 2;

        glTextureParameteri(bloomTextures[i], GL_TEXTURE_MIN_FILTER, GL_LINEAR);
        glTextureParameteri(bloomTextures[i], GL_TEXTURE_MAG_FILTER, GL_LINEAR);
        glTextureParameteri(bloomTextures[i], GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
        glTextureParameteri(bloomTextures[i], GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        glTextureStorage2D(bloomTextures[i], 1, GL_R11F_G11F_B10F, size[0], size[1]);
    }


    unsigned int cubeProgram = glCreateProgram();
    glAttachShader(cubeProgram, Shaders::loadShaderfromFile("resource\\cubeVertex.shader", GL_VERTEX_SHADER));
    glAttachShader(cubeProgram, Shaders::loadShaderfromFile("resource\\cubeFragment.shader", GL_FRAGMENT_SHADER));
    glLinkProgram(cubeProgram);
    glValidateProgram(cubeProgram);

    unsigned int downSampler = glCreateProgram();
    glAttachShader(downSampler, Shaders::loadShaderfromFile("resource\\downSampler.shader", GL_COMPUTE_SHADER));
    glLinkProgram(downSampler);

    unsigned int upSampler = glCreateProgram();
    glAttachShader(upSampler, Shaders::loadShaderfromFile("resource\\upSampler.shader", GL_COMPUTE_SHADER));
    glLinkProgram(upSampler);

    unsigned int finalProgram = glCreateProgram();
    glAttachShader(finalProgram, Shaders::loadShaderfromFile("resource\\samplerVertex.shader", GL_VERTEX_SHADER));
    glAttachShader(finalProgram, Shaders::loadShaderfromFile("resource\\finalFragment.shader", GL_FRAGMENT_SHADER));
    glLinkProgram(finalProgram);
    glValidateProgram(finalProgram);
    glUseProgram(finalProgram);

    glUniform1i(glGetUniformLocation(finalProgram, "texBloom"), 0);
    glUniform1i(glGetUniformLocation(finalProgram, "texHDR"), 1);
    glUniform1f(glGetUniformLocation(finalProgram, "exposure"), 1.0f);
    glUniform1f(glGetUniformLocation(finalProgram, "bloomStrength"), 0.4f);

    

    while (GLenum error = glGetError())
    {
        std::cout << error << std::endl;
    }
  
    while (!glfwWindowShouldClose(window))
    {
        //Input process
        currentFrame = (float)glfwGetTime();
        deltaFrame = currentFrame - lastFrame;
        lastFrame = currentFrame;
        KeyboardInputs(2.0f);
        viewM = glm::lookAt(cameraPos, cameraPos + cameraFront, cameraUp);
        glNamedBufferSubData(uniformBuffer, sizeof(glm::mat4), sizeof(glm::mat4), &viewM);

        //Render
        glViewport(0, 0, FBO_SIZE, FBO_SIZE);
        glBindFramebuffer(GL_FRAMEBUFFER, FBOCAPTURE);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
        glUseProgram(cubeProgram);
        glBindVertexArray(vertexArrayID);
        glDrawElements(GL_TRIANGLES, 36, GL_UNSIGNED_INT, nullptr);
        glBindFramebuffer(GL_FRAMEBUFFER, NULL);

        //DownSampling
        glUseProgram(downSampler);
        glBindImageTexture(0, tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 16, FBO_SIZE / 8, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[1], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 32, FBO_SIZE / 16, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[1], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[2], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 64, FBO_SIZE / 32, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[2], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[3], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 128, FBO_SIZE / 64, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[3], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[4], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 256, FBO_SIZE / 128, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        //UpSampling
        glUseProgram(upSampler);
        glBindImageTexture(0, bloomTextures[4], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[3], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 128, FBO_SIZE / 64, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[3], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[2], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 64, FBO_SIZE / 32, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[2], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[1], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 32, FBO_SIZE / 16, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[1], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, bloomTextures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 16, FBO_SIZE / 8, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glBindImageTexture(0, bloomTextures[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glBindImageTexture(1, tex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R11F_G11F_B10F);
        glDispatchCompute(FBO_SIZE / 8, FBO_SIZE / 4, 1);
        glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        glViewport(0, 0, SCREEN_WIDTH, SCREEN_HEIGHT);
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
        glUseProgram(finalProgram);
        glBindVertexArray(boxVertexArrayID);
        glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_INT, nullptr);



        glfwSwapBuffers(window);
        glfwPollEvents();
    }
    glfwTerminate();
    return 0;
}

void key_callback(GLFWwindow* window, int key, int scancode, int action, int mods)
{
    if (action == GLFW_PRESS)
    {
        switch (key)
        {
        case GLFW_KEY_ESCAPE:
            glfwSetWindowShouldClose(window, true);
            break;
        }
    }
}

我的问题是,

  1. 无论是否使用glMemoryBarrier,为什么我都无法使用async compute flight?
  2. PS Warp Can't Launch部分有一个峰值,请问有什么办法可以避免,还是正常的?
  3. 在我的计算着色器中,我使用 (Usage1)x = 4, y = 4, z = 1 次调用。我应该使用 (Usage2)x = 8, y = 4, z = 1 来使用所有的扭曲吗?
  4. 而且我认为情况很奇怪。 当我尝试使用像 Usage1 这样的调用时,我的 GPU 使用率有所下降(与 Usage2 相比)但是我的 GPU 内存时钟频率在 800Mhz-6000Mhz 之间不断下降和上升?我不知道为什么?
c++ optimization glsl compute-shader bloom
© www.soinside.com 2019 - 2024. All rights reserved.