我尝试用 C 实现霍夫曼代码。我有两个数组,一个存储所有节点,另一个包含指向某些节点的指针。第二个包含我想要组合的未完成的树。我必须对树进行排序,以将两者以最低频率组合起来,但 qsort() 因段错误而失败。在那里,我尝试比较树结构的频率成员,但失败了。当我尝试访问比较函数之外的频率成员时,它可以工作。 (相关部分是huffman.c中的compareFrequeny函数和最后一个while循环)
main.c:
#include "huffman.h"
#include <stdio.h>
int main(int argc, char* argv[])
{
FILE* inputP = fopen(argv[1], "rb");
FILE* outputP = fopen(argv[2], "w");
if (!(inputP && outputP)) return 1;
encode(inputP, outputP);
fclose(inputP);
fclose(outputP);
return 0;
}
霍夫曼.h:
#ifndef HUFFMAN_H
#define HUFFMAN_H
#include <stdio.h>
typedef struct node node;
// static int compareFrequency(const void* a, const void* b);
void encode(FILE* input, FILE* output);
#endif
霍夫曼.c:
#include "huffman.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct node {
node* left;
node* right;
unsigned int frequency;
char* bytes;
};
static int compareFrequency(const void* a, const void* b)
{
// Sort by frequency (higher first) or if equal by tree size (bigger first, only educated guess)
if (((node*)a)->frequency < ((node*)b)->frequency) return 1;
if (((node*)a)->frequency > ((node*)b)->frequency) return -1;
if (strlen(((node*)a)->bytes) < strlen(((node*)b)->bytes)) return 1;
if (strlen(((node*)a)->bytes) > strlen(((node*)b)->bytes)) return -1;
return 0;
}
void encode(FILE* input, FILE* output)
{
unsigned int possibleBytes[256] = { 0 }, differentTrees = 0, i;
char currChar;
// Count number of occurence of each individual byte
while ((currChar = fgetc(input)) != EOF) {
possibleBytes[currChar]++;
}
// Count number of different bytes
for (i = 0; i < 256; i++) {
if (possibleBytes[i]) differentTrees++;
}
// Make array filled with used bytes and their absolute frequency
node* nodes = calloc(differentTrees * 2 - 1, sizeof(node));
node* nodesP = nodes;
for (i = 0; i < 256; i++) {
if (possibleBytes[i]) {
nodesP->bytes = calloc(2, sizeof(char));
*nodesP->bytes = i;
*(nodesP->bytes + 1) = '\0';
nodesP->frequency = possibleBytes[i];
nodesP++;
}
}
// Fill trees array with nodes
node** trees = calloc(differentTrees, sizeof(node*));
// for (i = 0; i < differentTrees; i++) {
// printf("%c: %d\n", *(nodes + i)->bytes, (nodes + i)->frequency);
// }
node** treesP = trees;
nodesP = nodes;
for (i = 0; i < differentTrees; i++) {
*treesP = nodesP;
treesP++;
nodesP++;
}
// Build tree
node* lastTree;
node* secondLastTree;
while (differentTrees > 1) {
lastTree = *trees + differentTrees - 1;
secondLastTree = *trees + differentTrees - 2;
// printf("%d\n", differentTrees);
// printf("%p\n", nodesP);
// printf("%p\n\n", lastTree);
nodesP->left = lastTree;
nodesP->right = secondLastTree;
nodesP->frequency = lastTree->frequency + secondLastTree->frequency;
nodesP->bytes
= calloc(strlen(lastTree->bytes) + strlen(secondLastTree->bytes) + 1, sizeof(char));
strcat(nodesP->bytes, lastTree->bytes);
strcat(nodesP->bytes, secondLastTree->bytes);
secondLastTree = nodesP;
printf("%s\n", secondLastTree->bytes);
differentTrees--;
nodesP++;
printf("%u\n\n", secondLastTree->frequency);
qsort(*trees, differentTrees, sizeof(node*), &compareFrequency);
}
}
生成文件:
CC = gcc
CFLAGS = -g
OBJ = main.o huffman.o
huffman: $(OBJ)
$(CC) $(OBJ) -o $@
%.o: %.c
$(CC) $(CFLAGS) -c $<
clean:
rm *.o
我尝试使用 gdb 调试我的代码。 我希望不会出现段错误
代码中的问题是由于
encode
函数中指针的错误使用引起的,特别是在 qsort
的排序过程中。问题在于将 trees
视为 node
结构数组,而不是指向 node
结构的指针数组。
// Build tree
node* lastTree;
node* secondLastTree;
while (differentTrees > 1) {
lastTree = trees[differentTrees - 1];
secondLastTree = trees[differentTrees - 2];
// Create a new node to represent the combined tree
node* combinedTree = malloc(sizeof(node));
combinedTree->left = lastTree;
combinedTree->right = secondLastTree;
combinedTree->frequency = lastTree->frequency + secondLastTree->frequency;
// Update the array of pointers to trees
trees[differentTrees - 2] = combinedTree;
differentTrees--;
// Sort the array of pointers to trees based on frequency
qsort(trees, differentTrees, sizeof(node*), compareFrequency);
}
trees
现在被正确地视为指向 node
结构的指针数组。 trees[i]
语法用于访问数组中的第 i
指针,允许使用 qsort
的排序操作正常工作,而不会导致分段错误。
希望有帮助!