我的程序通过 getchar() 读取输入,使用二叉树结构保存每个单词出现的次数,然后在最后打印它们。任何仅包含数字、下划线和字母且不以数字开头的非空白字符集都算作一个单词。 当我将行
First_word First_word First_word Second_word Third_word Forth_word
放入名为 file.input
的文件中并通过命令 ./a.out <file.input
运行程序时,这是我得到的输出: 3 First_word
而还应该有其他单词。
我已经测试过它们,并且
getword
和 print
工作正常。这是我写的代码:
#include <stdio.h>
#include <string.h>
#include <ctype.h>
struct bt_node {
char *word;
int count;
struct bt_node *left;
struct bt_node *right;
};
int main()
{
extern struct bt_node *first;
int getword(char *, int);
struct bt_node *search(char *word, struct bt_node *node), *node = NULL;
void print(struct bt_node *node);
int lim = 100;
char word[lim];
while (getword(word, lim) != EOF) {
if (!(isalpha(word[0]) || word[0] == '_'))
continue;
search(word, node)->count++;
node = first;
}
print(first);
return 0;
}
void print(struct bt_node *node)
{
if (node->left != NULL)
print(node->left);
printf("%4d\t%s\n", node->count, node->word);
if (node->right != NULL)
print(node->right);
}
struct bt_node *search(char *word, struct bt_node *node)
{
struct bt_node *generate_node(char *word);
if (node == NULL)
node = generate_node(word);
int comp = strcmp(word, node->word);
if (comp == 0)
return node;
else
return search(word, 0 < comp ? node->right : node->left);
}
int getword(char *word, int lim)
{
int c, temp, getch(void);
void ungetch(int);
char *w = word;
pass_space: /* if encountered a comment or string skip it and go back to this step */
while (isspace(c = getch()))
;
if (c == '/') {
if ((temp = getch()) == '*') {
while ((c = getch()) != EOF)
if (c == '*' && (c = getch()) == '/')
goto pass_space;
else if (c == '*')
ungetch(c);
} else
ungetch(temp);
} else if (c == '\"') {
while ((c = getch()) != EOF)
if (c == '\"')
goto pass_space;
}
if (c != EOF)
*w++ = c;
if (!isalpha(c) && c != '_') {
*w = '\0';
return c;
}
for ( ; --lim > 0; w++)
if (!isalnum(*w = getch()) && *w != '_') {
ungetch(*w);
break;
}
*w = '\0';
return word[0];
}
#define BUFSIZE 100
int buf[BUFSIZE];
char bufp = 0;
int getch(void)
{
return bufp > 0 ? buf[--bufp] : getchar();
}
void ungetch(int c)
{
if (bufp < BUFSIZE)
buf[bufp++] = c;
else
printf("ungetch: too many characters, didn't unread %c\n", c);
}
struct bt_node *generate_node(char *word)
{
struct bt_node *bt_alloc(void);
char *alloc_char(int);
struct bt_node *node = bt_alloc();
node->word = alloc_char(strlen(word)+1);
strcpy(node->word, word);
node->count = 0;
node->left = NULL;
node->right = NULL;
return node;
}
#define BTALLOCSIZE 10000
static struct bt_node bt_allocbuf[BTALLOCSIZE];
static struct bt_node *bt_allocp = bt_allocbuf;
struct bt_node *first = bt_allocbuf;
struct bt_node *bt_alloc(void)
{
if (bt_allocp <= bt_allocbuf + BTALLOCSIZE)
return bt_allocp++;
else {
printf("bt_alloc - error: can't allocate\n");
return NULL;
}
}
#define ALLOCSIZE 10000
static char char_allocbuf[ALLOCSIZE];
static char *char_allocp = char_allocbuf;
char *alloc_char(int n)
{
if (char_allocbuf + ALLOCSIZE - char_allocp >= n) {
char_allocp += n;
return char_allocp - n;
} else {
printf("alloc_char - error: can't allocate\n");
return NULL;
}
}
弄清楚了,问题出在这个函数中:
struct bt_node *search(char *word, struct bt_node *node)
{
struct bt_node *generate_node(char *word);
if (node == NULL)
node = generate_node(word);
int comp = strcmp(word, node->word);
if (comp == 0)
return node;
else
return search(word, 0 < comp ? node->right : node->left);
}
我将一个指向我生成的变量
bt_node
的指针分配给变量node
,认为它会将指针分配给我调用该函数的参数,但由于值传递,它没有分配。结果,结构体被生成并保存,但它们的地址没有分配给子节点指针,并且 node->left
和 node->right
保持 NULL
。以这种方式更改功能有帮助:
struct bt_node *search(char *word, struct bt_node **node_pt)
{
struct bt_node *generate_node(char *word);
if (*node_pt == NULL)
*node_pt = generate_node(word);
int comp = strcmp(word, (*node_pt)->word);
if (comp == 0)
return *node_pt;
else
return search(word, (0 < comp ? &(*node_pt)->right : &(*node_pt)->left));
}
现在我只需修改函数声明,使其与新定义相对应,并将 main 中的调用
search(word, node)->count++;
更改为 search(word, &node)->count++;