Union 与 void 指针性能

问题描述 投票:0回答:1

TL;DR 联合体在性能方面比 void 指针更好

当我搜索 Union 与 void 指针性能时,我彻底解决了这个问题 Union 与 void 指针。 许多人建议使用 union,但都不是因为性能。我的问题是 void 指针是否比 union 花费更多时间,因为我们需要一次又一次地对其进行类型转换。

我写了下面的代码来测试性能,发现union要好得多。

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef struct union_test union_test;
typedef struct void_test void_test;
typedef void (*PrintUnionFunction)(union_test *);
typedef void (*PrintVoidFunction)(void_test *);

typedef enum ELEM_TYPE
{
    INT_TYPE,
    STRING_TYPE,
    FLOAT_TYPE
} ELEM_TYPE;

struct union_test
{
    ELEM_TYPE elemType;
    union {
        int **intElems;
        float **floatElems;
        char **stringElems;
    };
    size_t elemCount;
    PrintUnionFunction printFunction;
};

struct void_test
{
    ELEM_TYPE elemType;
    void **elems;
    size_t elemCount;
    PrintVoidFunction printFunction;
};

void printUnionInt(union_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = fpTest->intElems[i];
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

void printVoidPointerInt(void_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = ((int *)fpTest->elems[i]);
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

int main()
{
    clock_t start_time_union, end_time_union;
    clock_t start_time_void, end_time_void;
    size_t elemCount = 1024 * 1024 * 1024;

    union_test *fp = (union_test *)malloc(sizeof(union_test));
    fp->elemCount = elemCount;
    fp->elemType = INT_TYPE;
    fp->printFunction = printUnionInt;
    fp->intElems = (int **)malloc(sizeof(int *) * fp->elemCount);

    for (size_t i = 0; i < fp->elemCount; i++)
    {
        fp->intElems[i] = (int *)malloc(sizeof(int));
        memcpy(fp->intElems[i], &i, sizeof(int));
    }

    void_test *void_fp = (void_test *)malloc(sizeof(union_test));
    void_fp->elemCount = elemCount;
    void_fp->elemType = INT_TYPE;
    void_fp->printFunction = printVoidPointerInt;
    void_fp->elems = (void **)malloc(sizeof(void *) * void_fp->elemCount);

    for (size_t i = 0; i < void_fp->elemCount; i++)
    {
        void_fp->elems[i] = (int *)malloc(sizeof(int));
        memcpy(void_fp->elems[i], &i, sizeof(int));
    }

    start_time_union = clock();
    fp->printFunction(fp);
    end_time_union = clock();

    start_time_void = clock();
    void_fp->printFunction(void_fp);
    end_time_void = clock();

    printf("\n\nunion execution time: %f seconds\n", (double)(end_time_union - start_time_union) / CLOCKS_PER_SEC);
    printf("void pointer execution time: %f seconds\n", (double)(end_time_void - start_time_void) / CLOCKS_PER_SEC);

    return 0;
}

我得到了以下结果。

union execution time: 8.237730 seconds
void pointer execution time: 8.647505 seconds

我在一些地方看到,使用 -O3 将使编译器永远不会关心指针类型,并将所有内容都视为内存字节,但即使使用 -O3 标志,我也找不到任何改进的性能。

注意:我只关心性能而不是可读性。我知道,当我们要处理的类型数量有限时,使用 union 是提高可读性的好方法。

c union void-pointers
1个回答
0
投票

以下代码:

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef struct union_test union_test;
typedef struct void_test void_test;
typedef void (*PrintUnionFunction)(union_test *);
typedef void (*PrintVoidFunction)(void_test *);

typedef enum ELEM_TYPE
{
    INT_TYPE,
    STRING_TYPE,
    FLOAT_TYPE
} ELEM_TYPE;

struct union_test
{
    ELEM_TYPE elemType;
    union {
        int **intElems;
        float **floatElems;
        char **stringElems;
    };
    size_t elemCount;
    PrintUnionFunction printFunction;
};

struct void_test
{
    ELEM_TYPE elemType;
    void **elems;
    size_t elemCount;
    PrintVoidFunction printFunction;
};

void printUnionInt(union_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = fpTest->intElems[i];
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

void printVoidPointerInt(void_test *fpTest)
{
    for (size_t i = 0; i < fpTest->elemCount; i++)
    {
        int *temp = ((int *)fpTest->elems[i]);
        (*temp)++;
        // printf("%d ", *fpTest->intElems[i]);
    }
}

使用 gcc13.2 和 -O3 在 godbolt 上生成

printUnionInt:
        mov     rdx, QWORD PTR [rdi+16]
        test    rdx, rdx
        je      .L1
        mov     rax, QWORD PTR [rdi+8]
        lea     rcx, [rax+rdx*8]
.L3:
        mov     rdx, QWORD PTR [rax]
        add     rax, 8
        add     DWORD PTR [rdx], 1
        cmp     rcx, rax
        jne     .L3
.L1:
        ret
printVoidPointerInt:
        mov     rdx, QWORD PTR [rdi+16]
        test    rdx, rdx
        je      .L9
        mov     rax, QWORD PTR [rdi+8]
        lea     rcx, [rax+rdx*8]
.L11:
        mov     rdx, QWORD PTR [rax]
        add     rax, 8
        add     DWORD PTR [rdx], 1
        cmp     rcx, rax
        jne     .L11
.L9:
        ret

我拿了一个规则,测量了每个函数的长度,是一模一样的。没有什么区别。

© www.soinside.com 2019 - 2024. All rights reserved.