自动换行问题

问题描述 投票:0回答:1

我对 C 和一般编程非常陌生,我目前正在尝试编写一个程序,该程序将对一段文本进行自动换行,以便文本中的任何行都不会超过一定的大小。 readfile 函数从文本文件中读取文本行,并将其放入名为 text 的字符串数组中,其中文本数组的每个元素都是文本中的一行,而 write 代码创建一个名为 newtext 的新字符串数组,其中每个元素数组的 是一个自动换行的行,其长度由 linewidth 变量指定。我当前的问题是我的代码似乎生成的输出稍微偏离预期输出,我不确定为什么。 这是预期的输出:

这是我的输出:

我尝试调整结束索引并编写一个单独的循环来跳过空白空间,但似乎没有任何方法可以修复此特定错误

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

int readfile(char* filename, char*** bufp)
{
    FILE* fp = fopen(filename, "r");
    char** buf = NULL;
    int numlines = 0;
    char tmp[1000];
    while (fgets(tmp, sizeof(tmp), fp)) {
        if (numlines % 16 == 0) {
            buf = (char**)realloc(buf, (numlines+16) * sizeof(char*));
        }
        int len = strlen(tmp);
        tmp[len-1] = 0;
        buf[numlines] = malloc(len + 1);
        strcpy(buf[numlines], tmp);
        numlines++;
    }
    *bufp = buf;
    return numlines;
}

void print_text(char** lines, int numlines) {
    for (int i=0; i<numlines; i++) {
        printf("%s\n", lines[i]);
    }
}

int main(int argc, char** argv) {
    char** text;
    int numlines = readfile(argv[1], &text);
    int linewidth = atoi(argv[2]);

    char** newtext = NULL;
    int newnumlines = 0;

    // TODO
    // iterate through the text array
    //    create a char* variable line = text[i]
    //    iterate through the line
    //        if you are starting a new line allocate space for the newline
    //        make sure you put the newline into the newtext array
    //        and check if you need to reallocate the newtext array
    //
    //        copy the character into the newline array
    //        check if you have reached the max linewidth
    //            if you aren't already at the end of a word,
    //            backtrack till we find a space or get to start of line
    //            terminate the newline and reset the newline position to 0
    //    put a space in the newline, unless you are at the end of the newline
    
    for (int i = 0; i < numlines; i++)
    {
        char * line = text[i];
        int length = strlen(line);
        int x = 0;
        int start = 0;
        
        while (start < length) {
            // Calculate the end index of the current line segment
            int end = start + linewidth;

            // Adjust the end index if it falls within a word
            while (end > start && end < length && line[end] != ' ') {
                end--;
            }
            
            char *newline = malloc(end - x + 1 + 1); 
            strncpy(newline, line + start, end - start);
            newline[end - start] = '\0'; 
            
            newtext = realloc(newtext, (newnumlines + 1) * sizeof(char*));
            newtext[newnumlines++] = newline;
            
            start =  end;
            while(start < length && line[start] == ' ')
            {
                start++;
            }
            //x = end + 1;
           // start = x;
        }
        
    }

    for(int i = 0; i < newnumlines; i++) {
        // Skip printing empty lines
        if (strlen(newtext[i]) > 0) {
            printf("%s\n", newtext[i]);
        }
    }
    
    //freeing memory
    for(int i = 0; i < numlines; i++){
        free(text[i]);
    }
    free(text);
    
    for(int i = 0; i < newnumlines; i++){
        free(newtext[i]);
    }
    free(newtext);
    
    return 0;
}
c string word-wrap
1个回答
0
投票

这里有一些效果很好的自动换行代码,并且遵守我对您的问题的评论中提到的规则: 1.) 使用一个连续的单维文本数组,这样除了数组末尾之外的任何内容都不会中断段落。 2.) 忽略所有现有的换行符 (' ') 在源文本/数组中。 3.) 调用

next_break()
从当前偏移量进行“前瞻”,以预测下一个自然中断(空格、句点等)是否出现在我们打算强制执行的换行边距之前或之后。

很抱歉,这不是对您的代码的改编,这正是我通常喜欢的操作方式。在我看来,在这种情况下最好彻底离开。我承认我已经有了这段代码,所以为什么不分享它呢?

可运行、经过测试的代码在这里。 https://godbolt.org/z/Kq3n3T76s

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define TRUE 1
#define FALSE 0
#define RIGHT_MARGIN 60
#define SPACE (char)('+') /*visible representation of tab replacement for analysis */

char text[] = { "\tThen we have the\tinteresting \n property\t\tthat the solution 'to-the-right' is still the 'best' \
solution. In fact, for any given\t\tposition in the list-of-words, there is only one-'best'-solution. \
Furthermore, if\twe\t\t\t\t\tchoose any arbitrary-initial \n set of 'carets', then we'll eventually find a best \
'tail'. If we then work-our-way\t'backwards'\tfrom-the-tail, we can find the best 'line-plus-tail', \
and the best 'line-plus-line-plus-tail', and so forth. Notice that the cost of finding this 'best-tail' \
is always constant in the number\tof\tcarets---our solution is\t\tlinear\t\tin the number of line-breaks! There \
is some non-trivial \n book-keeping required to preserve this 'perfect linearity'; in our presentation of the \
algorithm, we will ignore this book-keeping and present \n an algorithm that is dominated-by-the-linearity \
of the number of line-breaks, but could act quadratic in pathological cases. Code follows. " };


void ErrorExit(char *str)
{
    puts(str);  
    exit(0);
}


/*--------------------------------------------------------------------------
    
    next_break()
    
    Algo: function does a look-ahead for a space, a hyphen... anything that
    constitutes a natural sentence break oppty.   Returns the index of 
    the break oppty to the caller.
*--------------------------------------------------------------------------*/
int next_break(const char * str)
{
int done = FALSE, tempindex= -1;
char ch;

    while(!done)
    {
        ch = str[++tempindex];
        switch( ch ) 
        {
                case 0:
                case (char)' ':
                case (char)'\n':
                case (char)'\t':
                case (char)'-':
                    done = TRUE;
                break;

                default:
                break;
        }
    }
    return(tempindex);
}

/*-------------------------------------------------------------------------------------
    
    wordwrap()
    
    Algo: parses a long string looking for line break opportunities with 
    every char. If a break oppty is found at cuurent offs, does a qwk scan ahead 
    via next_break() to see if a better oppty exists ahead. ('Better' means closer 
    to the margin but NOT past the margin)

    If no better oppty found ahead, inserts a newline into buffer & restarts the line
    count.  Else, postpones the newline until chars are read up to the better oppty.
    
    Inputs: char *src buffer needing word wrap formatting.
            int max_line_len for wrap margin.
            int pointer *ugly_breaks for returning number of middle-of-word breaks. 

    Returns a buffer having the formatted text.
*-------------------------------------------------------------------------------------*/
char *wordwrap(const char *src, const int max_line_len, int *ugly_breaks)
{
    int src_idx=0, dest_idx = 0, cur_line_len = 0, done = FALSE;
    char ch;
    char *dest = malloc(strlen(src)*3); /* Enough space for even the worst of wrap possibilities.*/ 
    int new_line_needed = FALSE;

    if(!dest)
        ErrorExit("Memory Allocation error in wordwrap");

    while(!done)
    {
        ch = src[src_idx];
        switch(ch)
        {
            case 0:
                done = TRUE;
            break;

            case (char)' ':
            case (char)'-':
                dest[dest_idx++]=ch; /* No matter what happens next, we will include this char... */
                cur_line_len++;   /* ... and so of course we need to say this. */
                /* Would the next break oppty put us past the margin/line limit? */
                if(cur_line_len + next_break(&src[src_idx+1]) >= max_line_len)
                {
                    /* A: Yes.  Take the break oppty here, Now*/
                    new_line_needed = TRUE;
                }
            break;

            case (char)'\n': /* ignore exisiting newlines */
            case (char)'\r': /* and carriage return. Strip them */
            break;

            case (char)'\t': /* Tab, replace with space(s)*/    
                    
                    if(cur_line_len+1 + next_break(&src[src_idx+1]) >= max_line_len)
                    {
                        /* We have a tab as the last character of the current line.
                        * You can expect this to be rare and it is.  But if you don't 
                        * care for it, result will be disappointing sooner or later*/
                        new_line_needed = TRUE;
                    }
                    else
                    {
                        /* Replace the 4s here with any tab stop you like. 8 is the standard.*/
                        int to_add = 4-((cur_line_len)%4);

                        while(to_add-- && cur_line_len < max_line_len)
                        {
                            dest[dest_idx++]=SPACE;  /* Adaptable space replacement char */
                            cur_line_len++;
                        }
                    }
            break;

            default:
                dest[dest_idx++]=ch;
                cur_line_len++;
            break;
        }
        
        /* Has one of our cases flagged a need for newline? */
        if(new_line_needed)
        {
            int space_remaining = (max_line_len-cur_line_len);
            double percent_remain = 0.0;

            new_line_needed = FALSE;

            /* We now take the newline request as advisement.  We inspect
            * the length of remaining chars on the current line before we agree.
            * If some long word is next, then we're going to break it up ugly 
            * instead of leaving a lot of unused space in our buffer/application.
            * It's merely trading one kind of ugly (unused space) for another (broken word). 
            * 
            * We want to keep going (no newline) if more than -- say 10% -- of current line 
            * would become white space by newlining right now.
            *
            * Set percent_remain tolerance lower than 10% to get more greedy
            * with space conservation but get more ugly word breaks.
            *
            * 5% (0.05) is pretty nice with an avg of only 2 ugly breaks per 
            * a paragraph with a "reasonable" margin (70 chars or more).
            *
            * Set to 100% (1.0) and you won't get any ugly breaks -- unless 
            * you encounter a Huge word that is longer than your margin limit.
            */
            if(cur_line_len > 0 )
                    percent_remain = (double)space_remaining/cur_line_len;
            if(percent_remain < 0.25) 
            {
                /* Not much space remaining, we can newline here */
                dest[dest_idx++]='\n';
                cur_line_len = 0;
            }
        }
        /* Since we are habitually ignoring new line requests made by the cases,
        * -- AND because it is possible to get some long character sequence or word
        * which may exceed our margin -- 
        * ... check for margin overflow with every loop. */ 
        if(cur_line_len >= max_line_len)
        {
            /* We have or will overflow with next char.
            * This is called breaking the word ugly.  Sorry babe.*/
            dest[dest_idx++]='\n';
            cur_line_len = 0;
            /* Track ugly breaks for tolerance & adjusting newline rejections*/
            (*ugly_breaks)++;  
        }
        src_idx++;
    }
    dest[dest_idx++]='\0';   /* cap it */
    return dest;
}


int main(int argc, char *argv[])
{
    int iii=0, right_margin = RIGHT_MARGIN, ugly=0;
    char *cptr;

    /* Setup some tab stop and margin visualisations */
    puts("         10        20        30        40        50        60        70        80");
    puts("12345678901234567890123456789012345678901234567890123456789012345678901234567890");
    puts("    |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   |   (4-char tab stops)");

    /* Call the app */
    cptr = wordwrap(text, right_margin, &ugly);

    /* print result in the buffer, char-by-char: */
    for(iii=0; cptr[iii]; iii++)
    {
        putchar(cptr[iii]);
    }

    printf("\nword wrap right_margin %d:   ugly breaks: %d    TAB-to-SPACE char: [%c]\n", right_margin, ugly, SPACE);
    printf("strlen(original text): %u\n", strlen(text));
    printf("strlen(return text):   %u\n", strlen(cptr));

    free(cptr);

    return 0;
}
© www.soinside.com 2019 - 2024. All rights reserved.