我需要对48位变量进行一些操作,所以我有两个选择:
因为操作不会溢出48位,所以我认为使用64位变量是过大的,所以我创建了基本结构
#ifdef __GNUC__
#define PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
#endif
#ifdef _MSC_VER
#define PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
#endif
PACK(struct uint48 {
unsigned long long v : 48;
});
并创建了一些代码来检查操作的速度
#include <stdio.h>
#include <time.h>
#ifdef __GNUC__
#define PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
#endif
#ifdef _MSC_VER
#define PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
#endif
PACK(struct uint48 {
unsigned long long v : 48;
});
void TestProductLong();
void TestProductLong02();
void TestProductPackedStruct();
void TestProductPackedStruct02();
clock_t start, end;
double cpu_time_used;
int cycleNumber = 100000;
int main(void)
{
TestProductLong();
TestProductLong02();
TestProductPackedStruct();
TestProductPackedStruct02();
return 0;
}
void TestProductLong() {
start = clock();
for (int i = 0; i < cycleNumber;i++) {
unsigned long long varlong01 = 155782;
unsigned long long varlong02 = 15519994;
unsigned long long product01 = varlong01 * varlong02;
unsigned long long varlong03 = 155782;
unsigned long long varlong04 = 15519994;
unsigned long long product02 = varlong03 * varlong04;
unsigned long long addition = product01 + product02;
}
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("TestProductLong() took %f seconds to execute \n", cpu_time_used);
}
void TestProductLong02() {
start = clock();
unsigned long long varlong01;
unsigned long long varlong02;
unsigned long long product01;
unsigned long long varlong03;
unsigned long long varlong04;
unsigned long long product02;
unsigned long long addition;
for (int i = 0; i < cycleNumber;i++) {
varlong01 = 155782;
varlong02 = 15519994;
product01 = varlong01 * varlong02;
varlong03 = 155782;
varlong04 = 15519994;
product02 = varlong03 * varlong04;
addition = product01 + product02;
}
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("TestProductLong02() took %f seconds to execute \n", cpu_time_used);
}
void TestProductPackedStruct() {
start = clock();
for (int i = 0; i < cycleNumber; i++) {
struct uint48 x01;
struct uint48 x02;
struct uint48 x03;
x01.v = 155782;
x02.v = 15519994;
x03.v = x01.v * x02.v;
struct uint48 x04;
struct uint48 x05;
struct uint48 x06;
x04.v = 155782;
x05.v = 15519994;
x06.v = x04.v * x05.v;
struct uint48 x07;
x07.v = x03.v + x06.v;
}
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("TestProductPackedStruct() took %f seconds to execute \n", cpu_time_used);
}
void TestProductPackedStruct02() {
start = clock();
struct uint48 x01;
struct uint48 x02;
struct uint48 x03;
struct uint48 x04;
struct uint48 x05;
struct uint48 x06;
struct uint48 x07;
for (int i = 0; i < cycleNumber; i++) {
x01.v = 155782;
x02.v = 15519994;
x03.v = x01.v * x02.v;
x04.v = 155782;
x05.v = 15519994;
x06.v = x04.v * x05.v;
x07.v = x03.v + x06.v;
}
end = clock();
cpu_time_used = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("TestProductPackedStruct02() took %f seconds to execute \n", cpu_time_used);
}
但是我得到了以下结果
TestProductLong() took 0.000188 seconds to execute
TestProductLong02() took 0.000198 seconds to execute
TestProductPackedStruct() took 0.001231 seconds to execute
TestProductPackedStruct02() took 0.001231 seconds to execute
因此,使用unsigned long long的操作比使用压缩结构的操作花费的时间更少。
由于我现在正在展开循环,因此使用正确的数据结构可能会严重影响应用程序的性能。
谢谢。
您的测试程序错误。为什么?
当您将此代码排序为可优化时(由于您不使用该值,因此它们必须是全局的或至少是静态的。)>
不同之处在于将结果修整为48位。
如果打包了该结构(这里不需要),则强制编译器按字节访问变量-因为只有字节总是对齐的:https://godbolt.org/z/2iV7vq
您还可以使用混合方法-不可移植,因为它依赖于字节序和位域实现https://godbolt.org/z/J3-it_
尽管you