C缓存性能练习

问题描述 投票:0回答:1

我正在通过测量缓存未命中来进行有关缓存性能的练习。将针对创建链接列表并从列表中选择值的练习对性能进行测试。练习如下:

实现将包含类型元素的链表

typedef struct node {
   int size;
   int *val;
   struct node * next;
} node_t;

val变量指向包含«size»元素的数组。现在用100万个大小为100的数组填充您的链接列表。

  1. 首先我们将为i = 0至499和j = 0至99来选择500 000个值(i,j)>
  2. 现在,我们在链接列表中随机选择500 000个值i和j并检索它们。
  3. 现在用2000000大小的数组填充您的链接列表,随机选择1和200,然后选择500 000个连续或随机值。

[我实现了以下代码,并且还增加了从终端进行锻炼的可能性(0->正常选择,1->随机选择,2->连续和全部3)。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>


struct node {
    int size;
    int* val;
    struct node* next;
};

clock_t start, end;
double cpuTimeUsed;


//initialize head in NULL for empty linked list
struct node* head;

//push new node at beginning
void push(int* val, int size) {
    struct node* newNode = (struct node*) malloc(sizeof(struct node));
    newNode->val = val;
    newNode->size = size;

    newNode->next = head;
    head = newNode;
}

//does not check if head is null
//assumes i and j are positive integers >= 0

int retrieve(int i, int j) {
    int indexNode = 0;
    int indexElement = 0;
    struct node* current = head;
    while (indexNode < i) {
        current = current->next;
        indexNode++;
    }

    int size = current->size;
    int* values = current->val;

    while (indexElement < j && indexElement < size) {
        indexElement++;
        values++;
    }
    return *values;
}

void deleteList() {
    struct node* current = head;
    while (current != NULL) {
        struct node* next = current->next;
        free(current);
        current = next;
    }
    head = NULL;
}

struct node* getNode(int index) {
    struct node* current = head;
    int i;
    for (i = 0; i < index; i++) {
        current = current->next;
    }
    return current;
}

void initialize(int numberNodes, int numberVals) {
    int i;

    for (i = 0; i < numberNodes; i++) {
        int array[numberVals];
        int* values = array;
        push(values, numberVals);
    }
}

void initializeRandomSize(int numberNodes, int maxValues) {
    int i;
    int randomSize;
    for (i = 0; i < numberNodes; i++) {
        randomSize = (rand() % 200) + 1; //return a number between 1 and 200
        int array[randomSize];
        int* values = array;
        push(values, randomSize);

    }

}

void performNormalSelect() {
    //selecting 500,000 values from 0 to 499 in i and j in 0 to 99
    int value;
    int i;
    int j;
    for (i = 0; i < 500; i++) {
        for (j = 0; j < 100; j++) {
            value = retrieve(i, j);
        }
    }
}

void performRandomSelect(int numberNodes, int numberVals) {
    //randomly select 500,000 values
    int i;
    int j;
    int n;
    int value;
    for (n = 0; n < 500000; n++) {
        i = rand() % numberNodes;
        j = rand() % numberVals;
        value = retrieve(i, j);
    }
}

void performContiguousSelect(int numberNodes) {
        int valuesSelected = 0;
        int nodeNumber;
        int i;
        int value;
        struct node* currentNode;

        while (valuesSelected < 500000) {
            //get a random node 
            int nodeNumber = rand() % numberNodes;
            currentNode = getNode(nodeNumber);
            int size = currentNode->size;
            //get all values in nodes val
            for (i = 0; i < size; i++) {
                value = retrieve(nodeNumber, i);
                valuesSelected++;
            }
        }
    }


int main(int argc, char *argv[]) {
    srand(time(NULL)); //used for random numbers

    if (argc != 2) {
        printf("Oops, only accept a value for option number\n");
        return 0;
    }

    char option = argv[1][0];

    switch (option) {
        case '0' :
            initialize(1000000, 100);
            //select values from i in range(0, 499) and j in range(0,99)
            start = clock();
            performNormalSelect();
            end = clock();
            cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC;
            printf("Time for normal select: %f\n", cpuTimeUsed);
            break;

        case '1' :
            initialize(1000000, 100);
            //select randomly values
            start = clock();
            performRandomSelect(1000000, 100);
            end = clock();
            cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC;
            printf("Time for random select: %f\n", cpuTimeUsed);
            break;

        case '2':
            //fill list with 2000000 of random size 1 and 200 and select 500000 continuos
            initializeRandomSize(2000000, 200);
            start = clock();
            performContiguousSelect(2000000);
            end = clock();
            cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC;
            printf("Time for contiguous select: %f\n", cpuTimeUsed);
            break;

        case '3':
            //perform all of the above
            initialize(1000000, 100);
            start = clock();
            performNormalSelect();
            end = clock();
            double cpuNormal = ((double) (end - start)) / CLOCKS_PER_SEC;

            start = clock();
            performRandomSelect(1000000, 100);
            end = clock();
            double cpuRandom = ((double) (end - start)) / CLOCKS_PER_SEC;

            deleteList();
            initializeRandomSize(2000000, 200);
             start = clock();
            performContiguousSelect(2000000);
            end = clock();
            double cpuCont = ((double) (end - start)) / CLOCKS_PER_SEC;

            printf("Time normal select: %f\n", cpuNormal);
            printf("Time random select: %f\n", cpuRandom);
            printf("Time contiguous select: %f\n", cpuCont);
            break;

        default :
            cpuTimeUsed = 0;
            printf("Time: %f\n", cpuTimeUsed);
    }







    return 0;
}

然后我使用valgrind来检查缓存未命中

valgrind --tool=callgrind --simulate-cache=yes ./program program-arguments

当我尝试使用随机选择运行探查器时,就会出现问题。探查器花费了8个多小时,直到那时仍未完成。我知道在不使用分析器的情况下尝试该程序时,随机选择的确会产生结果。我在具有以下配置的Ubuntu VM上运行程序:

  • OS:18.04
  • 处理器数量:2
  • 基本内存:4096 MB
  • 初始硬盘内存10GB
  • 代码是否有问题,还是应该让探查器运行更多时间?任何反馈表示赞赏,谢谢!

(编辑)还有一个优化问题,但是我很难理解如何将其实现到先前的代码中。优化-现在我们将尝试优化此链接列表。为此,我们将分配1亿个字节的大内存空间,并将指针替换为如此大的存储空间中的索引。这意味着节点结构现在是

typedef struct node {
int size;
int val;
struct node nextInd;
} node_t;

其中val现在是我们已分配的内存中的索引,而下一个节点是由索引标识。

我正在通过测量缓存未命中来进行有关缓存性能的练习。将针对创建链接列表并从列表中选择值的练习对性能进行测试。 ...

c
1个回答
0
投票
© www.soinside.com 2019 - 2024. All rights reserved.