我正在寻找一种优化 alpha 混合的方法,但是对于带有 alpha 的两种颜色(与问题如何快速 alpha 混合 RGBA 无符号字节颜色?)
最初我使用了带有浮点数的解决方案(RGB范围从
0.0f
到255.0f
,A范围从0.0f
到1.0f
):
inline void alphaBlend(Color& baseColor, Color targetColor)
{
float newAlpha = (1 - targetColor.A) * baseColor.A + targetColor.A;
baseColor.R = ((1 - targetColor.A) * baseColor.A * baseColor.R + targetColor.A * targetColor.R) / newAlpha;
baseColor.G = ((1 - targetColor.A) * baseColor.A * baseColor.G + targetColor.A * targetColor.G) / newAlpha;
baseColor.B = ((1 - targetColor.A) * baseColor.A * baseColor.B + targetColor.A * targetColor.B) / newAlpha;
}
我更改了算法以处理
unsigned int
RGBA 颜色。我将每个对 alpha 的引用替换为 (alpha / 255),然后更正公式,使值仍在正确的范围内。
baseColor.R = ((1 - targetColor.A) * baseColor.A * baseColor.R + targetColor.A * targetColor.R) / newAlpha;
Shorthand (targetColor.A -> tA etc.):
R = ((1 - tA) * bA * bR + tA * tR) / newAlpha
(introducing 255-based alpha requires replacing all A instances with A/255)
= ((1 - (tA / 255)) * (bA / 255) * bR + (tA / 255) * tR) / (newAlpha / 255)
(remove 255 from the denominator's denominator)
= (((1 - (tA / 255)) * (bA / 255) * bR + (tA / 255) * tR) * 255) / newAlpha
(get rid of direct alpha divisions by 255 by multiplying parethesis by 255/255)
= (( ((255 - tA) * bA * bR) / 255^2 + (tA * tR) / 255) * 255) / newAlpha
(multiplying by the last 255 causes denominators to reduce)
= ( ((255 - tA) * bA * bR) / 255 + (tA * tR * 255) / 255 ) / newAlpha
(Pushing numerator's denominator (255) to the denominator)
= ( (255 - tA) * bA * bR) + (tA * tR * 255) ) / (255 * newAlpha)
(Expanding first multiplication in numerator)
= ( 255 * bA * bR - tA * bA * bR + tA * tR * 255) / (255 * newAlpha)
^^^^^^^^^^^^ ^^^^^^^^^^^^^
(reordering not to fall below 0 during calculations)
= ( 255 * bA * bR + tA * tR * 255 - tA * bA * bR ) / (255 * newAlpha)
(grouping to minimize multiplications)
= ( (ba * bR + tA * tR) * 255 - tA * bA * bR ) / (255 * newAlpha)
(introducing bit shifting - losing precision, but in an acceptable range)
~= ( ((ba * bR + tA * tR) << 8) - tA * bA * bR) / (newAlpha << 8)
我设法编写了以下代码:
inline void alphaBlend(IntColor& baseColor, IntColor targetColor)
{
unsigned int a = (((baseColor.A + targetColor.A) << 8) - targetColor.A * baseColor.A) >> 8;
if (a > 0)
{
unsigned int divisor = a << 8;
unsigned int baseAR = baseColor.A * baseColor.R;
baseColor.R = (((targetColor.A * targetColor.R + baseAR) << 8) - (baseAR * targetColor.A)) / divisor;
unsigned int baseAG = baseColor.A * baseColor.G;
baseColor.G = (((targetColor.A * targetColor.G + baseAG) << 8) - (baseAG * targetColor.A)) / divisor;
unsigned int baseAB = baseColor.A * baseColor.B;
baseColor.B = (((targetColor.A * targetColor.B + baseAB) << 8) - (baseAB * targetColor.A)) / divisor;
baseColor.A = a;
}
else
{
baseColor.R = 0;
baseColor.G = 0;
baseColor.B = 0;
baseColor.A = 0;
}
}
此更改将示例数据的渲染时间从 27559 毫秒减少到 17751 毫秒。由于 alpha 混合似乎是渲染工作流程中最常见的操作,我很好奇是否有办法进一步优化它。
我想过同时对R和B进行计算,但不幸的是在某些情况下计算会超过两个字节(例如如果bA = bR = tA = tR = 255,则减法的左边部分将等于33162750 = 0x1faa05fe).
我可以应用任何其他优化来使此代码更快吗?
编辑:回复评论:
我将这个答案留给那些也在寻找基于整数计算的两种颜色与 alpha 的 alpha 混合(即允许“背景”或“基础”颜色也为半透明)的人。它不是非常快,但肯定比它的浮点等价物更快。
不幸的是,我的问题中提供的代码存在缺陷,有时会给出 256 的结果,这在某些情况下会导致丑陋的黑色像素 (
(unsigned char)256 == 0
)。
下面的代码提供了解决方案,也可以作为正确性检查。它验证了:
[0..255]
信息丰富的是,通常留在 [0.0f..1.0f] 范围内的浮点 alpha 被标准化为 [0.0f..255.0f],以便我可以将其与对应的 int 进行比较。
验证代码/解决方案如下。
#include <iostream>
int main()
{
uint64_t diffs = 0;
for (unsigned int baseAlpha = 0; baseAlpha < 256; baseAlpha++)
{
printf("Processing a1 = %d\n", baseAlpha);
for (unsigned int baseColor = 0; baseColor < 256; baseColor++)
for (unsigned int targetAlpha = 0; targetAlpha < 256; targetAlpha++)
for (unsigned int targetColor = 0; targetColor < 256; targetColor++)
{
// Evaluate float result (FLOAT ALPHA BLENDING)
// R, G, B in [0.0f,255.0f];
// A in [0.0f, 1.0f]
float floatBaseAlpha = baseAlpha / 255.0f;
float floatTargetAlpha = targetAlpha / 255.0f;
float floatResultAlpha = (1 - floatTargetAlpha) * floatBaseAlpha + floatTargetAlpha;
float floatResultColor = 0.0f;
if (floatResultAlpha >= 1.0f / 255.0f)
{
floatResultColor = ((1 - floatTargetAlpha) * floatBaseAlpha * baseColor + floatTargetAlpha * targetColor) / floatResultAlpha;
}
else
{
floatResultColor = 0.0f;
}
floatResultAlpha *= 255.0f;
// Evaluate int result (INT ALPHA BLENDING)
// R, G, B, A in [0, 255]
int intResultAlpha = (((baseAlpha + targetAlpha) * 255) - targetAlpha * baseAlpha);
int intResultColor;
if (intResultAlpha > 0)
{
unsigned int divisor = intResultAlpha;
unsigned int baseAR = baseAlpha * baseColor;
intResultColor = (((targetAlpha * targetColor + baseAR) * 255) - (baseAR * targetAlpha)) / divisor;
}
else
{
intResultColor = 0;
}
intResultAlpha = intResultAlpha / 255;
// Compare
int alphaFromFloat = (int)floatResultAlpha;
int colorFromFloat = (int)floatResultColor;
int alphaFromInt = (int)intResultAlpha;
int colorFromInt = (int)intResultColor;
int aDiff = std::abs(alphaFromFloat - alphaFromInt);
int cDiff = std::abs(colorFromFloat - colorFromInt);
if (colorFromInt > 255 || colorFromInt < 0)
{
printf("Int color outside range!");
}
if (aDiff > 1 || cDiff > 1)
{
printf("Critical difference: bA: %u, bC: %u, tA: %u, tC: %u\n", baseAlpha, baseColor, targetAlpha, targetColor);
printf("Float result: A: %d, C: %d\n", alphaFromFloat, colorFromFloat);
printf("Int result: A: %d, C: %d\n", alphaFromInt, colorFromInt);
printf("Alpha difference: %d\n", aDiff);
printf("Color difference: %d\n", cDiff);
}
if (aDiff > 0 || cDiff > 0)
diffs++;
}
}
printf("Total differences: %lld (%lld%%)\n", diffs, (100Ui64 * diffs / (1Ui64 << 32)));
getchar();
}
结果出奇的好 - 应用程序测试了所有可能的颜色和 Alpha 组合,与浮点计算(我认为有效)不同的数量低于 1%(组合总数为 4 294 967 296 ):
Total differences: 4959508 (0%)
流行的 Alpha 混合优化是通过分别用 (
* 255, / 255
) 替换 (<< 8, >> 8
) 操作来实现的,它等于 (* 256, / 256
)。由于我们需要乘以和除以 255 而不是 256,因此优化的代价是精度下降。坏消息是错误结果的数量急剧增加,但好消息是错误仍然没有超过 alpha 和颜色的单位值:
int intResultAlpha = (((baseAlpha + targetAlpha) << 8) - targetAlpha * baseAlpha);
int intResultColor;
if (intResultAlpha > 0)
{
unsigned int divisor = intResultAlpha;
unsigned int baseAR = baseAlpha * baseColor;
intResultColor = (((targetAlpha * targetColor + baseAR) << 8) - (baseAR * targetAlpha)) / divisor;
}
else
{
intResultColor = 0;
}
intResultAlpha = intResultAlpha >> 8;
Total differences: 1218912093 (28%)
所以现在:
如果您决定选择最后一个解决方案,请不要忘记在这里发布另一个答案 - 我相信每个人都会从快速的 alpha 混合算法中受益。