我正在通过测量缓存未命中来进行有关缓存性能的练习。将针对创建链接列表并从列表中选择值的练习对性能进行测试。练习如下:
实现将包含类型元素的链表
typedef struct node {
int size;
int *val;
struct node * next;
} node_t;
val变量指向包含«size»元素的数组。现在用100万个大小为100的数组填充您的链接列表。
[我实现了以下代码,并且还增加了从终端进行锻炼的可能性(0->正常选择,1->随机选择,2->连续和全部3)。
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <time.h> struct node { int size; int* val; struct node* next; }; clock_t start, end; double cpuTimeUsed; //initialize head in NULL for empty linked list struct node* head; //push new node at beginning void push(int* val, int size) { struct node* newNode = (struct node*) malloc(sizeof(struct node)); newNode->val = val; newNode->size = size; newNode->next = head; head = newNode; } //does not check if head is null //assumes i and j are positive integers >= 0 int retrieve(int i, int j) { int indexNode = 0; int indexElement = 0; struct node* current = head; while (indexNode < i) { current = current->next; indexNode++; } int size = current->size; int* values = current->val; while (indexElement < j && indexElement < size) { indexElement++; values++; } return *values; } void deleteList() { struct node* current = head; while (current != NULL) { struct node* next = current->next; free(current); current = next; } head = NULL; } struct node* getNode(int index) { struct node* current = head; int i; for (i = 0; i < index; i++) { current = current->next; } return current; } void initialize(int numberNodes, int numberVals) { int i; for (i = 0; i < numberNodes; i++) { int array[numberVals]; int* values = array; push(values, numberVals); } } void initializeRandomSize(int numberNodes, int maxValues) { int i; int randomSize; for (i = 0; i < numberNodes; i++) { randomSize = (rand() % 200) + 1; //return a number between 1 and 200 int array[randomSize]; int* values = array; push(values, randomSize); } } void performNormalSelect() { //selecting 500,000 values from 0 to 499 in i and j in 0 to 99 int value; int i; int j; for (i = 0; i < 500; i++) { for (j = 0; j < 100; j++) { value = retrieve(i, j); } } } void performRandomSelect(int numberNodes, int numberVals) { //randomly select 500,000 values int i; int j; int n; int value; for (n = 0; n < 500000; n++) { i = rand() % numberNodes; j = rand() % numberVals; value = retrieve(i, j); } } void performContiguousSelect(int numberNodes) { int valuesSelected = 0; int nodeNumber; int i; int value; struct node* currentNode; while (valuesSelected < 500000) { //get a random node int nodeNumber = rand() % numberNodes; currentNode = getNode(nodeNumber); int size = currentNode->size; //get all values in nodes val for (i = 0; i < size; i++) { value = retrieve(nodeNumber, i); valuesSelected++; } } } int main(int argc, char *argv[]) { srand(time(NULL)); //used for random numbers if (argc != 2) { printf("Oops, only accept a value for option number\n"); return 0; } char option = argv[1][0]; switch (option) { case '0' : initialize(1000000, 100); //select values from i in range(0, 499) and j in range(0,99) start = clock(); performNormalSelect(); end = clock(); cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC; printf("Time for normal select: %f\n", cpuTimeUsed); break; case '1' : initialize(1000000, 100); //select randomly values start = clock(); performRandomSelect(1000000, 100); end = clock(); cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC; printf("Time for random select: %f\n", cpuTimeUsed); break; case '2': //fill list with 2000000 of random size 1 and 200 and select 500000 continuos initializeRandomSize(2000000, 200); start = clock(); performContiguousSelect(2000000); end = clock(); cpuTimeUsed = ((double) (end - start)) / CLOCKS_PER_SEC; printf("Time for contiguous select: %f\n", cpuTimeUsed); break; case '3': //perform all of the above initialize(1000000, 100); start = clock(); performNormalSelect(); end = clock(); double cpuNormal = ((double) (end - start)) / CLOCKS_PER_SEC; start = clock(); performRandomSelect(1000000, 100); end = clock(); double cpuRandom = ((double) (end - start)) / CLOCKS_PER_SEC; deleteList(); initializeRandomSize(2000000, 200); start = clock(); performContiguousSelect(2000000); end = clock(); double cpuCont = ((double) (end - start)) / CLOCKS_PER_SEC; printf("Time normal select: %f\n", cpuNormal); printf("Time random select: %f\n", cpuRandom); printf("Time contiguous select: %f\n", cpuCont); break; default : cpuTimeUsed = 0; printf("Time: %f\n", cpuTimeUsed); } return 0; }
然后我使用valgrind来检查缓存未命中
valgrind --tool=callgrind --simulate-cache=yes ./program program-arguments
当我尝试使用随机选择运行探查器时,就会出现问题。探查器花费了8个多小时,直到那时仍未完成。我知道在不使用分析器的情况下尝试该程序时,随机选择的确会产生结果。我在具有以下配置的Ubuntu VM上运行程序:
代码是否有问题,还是应该让探查器运行更多时间?任何反馈表示赞赏,谢谢!
(编辑)还有一个优化问题,但是我很难理解如何将其实现到先前的代码中。优化-现在我们将尝试优化此链接列表。为此,我们将分配1亿个字节的大内存空间,并将指针替换为如此大的存储空间中的索引。这意味着节点结构现在是
typedef struct node { int size; int val; struct node nextInd; } node_t;
其中val现在是我们已分配的内存中的索引,而下一个节点是由索引标识。
我正在通过测量缓存未命中来进行有关缓存性能的练习。将针对创建链接列表并从列表中选择值的练习对性能进行测试。 ...