我对 C 和一般编程非常陌生,我目前正在尝试编写一个程序,该程序将对一段文本进行自动换行,以便文本中的任何行都不会超过一定的大小。 readfile 函数从文本文件中读取文本行,并将其放入名为 text 的字符串数组中,其中文本数组的每个元素都是文本中的一行,而 write 代码创建一个名为 newtext 的新字符串数组,其中每个元素数组的 是一个自动换行的行,其长度由 linewidth 变量指定。我当前的问题是我的代码似乎生成的输出稍微偏离预期输出,我不确定为什么。 这是预期的输出:
我尝试调整结束索引并编写一个单独的循环来跳过空白空间,但似乎没有任何方法可以修复此特定错误
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int readfile(char* filename, char*** bufp)
{
FILE* fp = fopen(filename, "r");
char** buf = NULL;
int numlines = 0;
char tmp[1000];
while (fgets(tmp, sizeof(tmp), fp)) {
if (numlines % 16 == 0) {
buf = (char**)realloc(buf, (numlines+16) * sizeof(char*));
}
int len = strlen(tmp);
tmp[len-1] = 0;
buf[numlines] = malloc(len + 1);
strcpy(buf[numlines], tmp);
numlines++;
}
*bufp = buf;
return numlines;
}
void print_text(char** lines, int numlines) {
for (int i=0; i<numlines; i++) {
printf("%s\n", lines[i]);
}
}
int main(int argc, char** argv) {
char** text;
int numlines = readfile(argv[1], &text);
int linewidth = atoi(argv[2]);
char** newtext = NULL;
int newnumlines = 0;
// TODO
// iterate through the text array
// create a char* variable line = text[i]
// iterate through the line
// if you are starting a new line allocate space for the newline
// make sure you put the newline into the newtext array
// and check if you need to reallocate the newtext array
//
// copy the character into the newline array
// check if you have reached the max linewidth
// if you aren't already at the end of a word,
// backtrack till we find a space or get to start of line
// terminate the newline and reset the newline position to 0
// put a space in the newline, unless you are at the end of the newline
for (int i = 0; i < numlines; i++)
{
char * line = text[i];
int length = strlen(line);
int x = 0;
int start = 0;
while (start < length) {
// Calculate the end index of the current line segment
int end = start + linewidth;
// Adjust the end index if it falls within a word
while (end > start && end < length && line[end] != ' ') {
end--;
}
char *newline = malloc(end - x + 1 + 1);
strncpy(newline, line + start, end - start);
newline[end - start] = '\0';
newtext = realloc(newtext, (newnumlines + 1) * sizeof(char*));
newtext[newnumlines++] = newline;
start = end;
while(start < length && line[start] == ' ')
{
start++;
}
//x = end + 1;
// start = x;
}
}
for(int i = 0; i < newnumlines; i++) {
// Skip printing empty lines
if (strlen(newtext[i]) > 0) {
printf("%s\n", newtext[i]);
}
}
//freeing memory
for(int i = 0; i < numlines; i++){
free(text[i]);
}
free(text);
for(int i = 0; i < newnumlines; i++){
free(newtext[i]);
}
free(newtext);
return 0;
}
这里有一些效果很好的自动换行代码,并且遵守我对您的问题的评论中提到的规则: 1.) 使用一个连续的单维文本数组,这样除了数组末尾之外的任何内容都不会中断段落。 2.) 忽略所有现有的换行符 (' ') 在源文本/数组中。 3.) 调用
next_break()
从当前偏移量进行“前瞻”,以预测下一个自然中断(空格、句点等)是否出现在我们打算强制执行的换行边距之前或之后。
很抱歉,这不是对您的代码的改编,这正是我通常喜欢的操作方式。在我看来,在这种情况下最好彻底离开。我承认我已经有了这段代码,所以为什么不分享它呢?
可运行、经过测试的代码在这里。 https://godbolt.org/z/Kq3n3T76s
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define TRUE 1
#define FALSE 0
#define RIGHT_MARGIN 60
#define SPACE (char)('+') /*visible representation of tab replacement for analysis */
char text[] = { "\tThen we have the\tinteresting \n property\t\tthat the solution 'to-the-right' is still the 'best' \
solution. In fact, for any given\t\tposition in the list-of-words, there is only one-'best'-solution. \
Furthermore, if\twe\t\t\t\t\tchoose any arbitrary-initial \n set of 'carets', then we'll eventually find a best \
'tail'. If we then work-our-way\t'backwards'\tfrom-the-tail, we can find the best 'line-plus-tail', \
and the best 'line-plus-line-plus-tail', and so forth. Notice that the cost of finding this 'best-tail' \
is always constant in the number\tof\tcarets---our solution is\t\tlinear\t\tin the number of line-breaks! There \
is some non-trivial \n book-keeping required to preserve this 'perfect linearity'; in our presentation of the \
algorithm, we will ignore this book-keeping and present \n an algorithm that is dominated-by-the-linearity \
of the number of line-breaks, but could act quadratic in pathological cases. Code follows. " };
void ErrorExit(char *str)
{
puts(str);
exit(0);
}
/*--------------------------------------------------------------------------
next_break()
Algo: function does a look-ahead for a space, a hyphen... anything that
constitutes a natural sentence break oppty. Returns the index of
the break oppty to the caller.
*--------------------------------------------------------------------------*/
int next_break(const char * str)
{
int done = FALSE, tempindex= -1;
char ch;
while(!done)
{
ch = str[++tempindex];
switch( ch )
{
case 0:
case (char)' ':
case (char)'\n':
case (char)'\t':
case (char)'-':
done = TRUE;
break;
default:
break;
}
}
return(tempindex);
}
/*-------------------------------------------------------------------------------------
wordwrap()
Algo: parses a long string looking for line break opportunities with
every char. If a break oppty is found at cuurent offs, does a qwk scan ahead
via next_break() to see if a better oppty exists ahead. ('Better' means closer
to the margin but NOT past the margin)
If no better oppty found ahead, inserts a newline into buffer & restarts the line
count. Else, postpones the newline until chars are read up to the better oppty.
Inputs: char *src buffer needing word wrap formatting.
int max_line_len for wrap margin.
int pointer *ugly_breaks for returning number of middle-of-word breaks.
Returns a buffer having the formatted text.
*-------------------------------------------------------------------------------------*/
char *wordwrap(const char *src, const int max_line_len, int *ugly_breaks)
{
int src_idx=0, dest_idx = 0, cur_line_len = 0, done = FALSE;
char ch;
char *dest = malloc(strlen(src)*3); /* Enough space for even the worst of wrap possibilities.*/
int new_line_needed = FALSE;
if(!dest)
ErrorExit("Memory Allocation error in wordwrap");
while(!done)
{
ch = src[src_idx];
switch(ch)
{
case 0:
done = TRUE;
break;
case (char)' ':
case (char)'-':
dest[dest_idx++]=ch; /* No matter what happens next, we will include this char... */
cur_line_len++; /* ... and so of course we need to say this. */
/* Would the next break oppty put us past the margin/line limit? */
if(cur_line_len + next_break(&src[src_idx+1]) >= max_line_len)
{
/* A: Yes. Take the break oppty here, Now*/
new_line_needed = TRUE;
}
break;
case (char)'\n': /* ignore exisiting newlines */
case (char)'\r': /* and carriage return. Strip them */
break;
case (char)'\t': /* Tab, replace with space(s)*/
if(cur_line_len+1 + next_break(&src[src_idx+1]) >= max_line_len)
{
/* We have a tab as the last character of the current line.
* You can expect this to be rare and it is. But if you don't
* care for it, result will be disappointing sooner or later*/
new_line_needed = TRUE;
}
else
{
/* Replace the 4s here with any tab stop you like. 8 is the standard.*/
int to_add = 4-((cur_line_len)%4);
while(to_add-- && cur_line_len < max_line_len)
{
dest[dest_idx++]=SPACE; /* Adaptable space replacement char */
cur_line_len++;
}
}
break;
default:
dest[dest_idx++]=ch;
cur_line_len++;
break;
}
/* Has one of our cases flagged a need for newline? */
if(new_line_needed)
{
int space_remaining = (max_line_len-cur_line_len);
double percent_remain = 0.0;
new_line_needed = FALSE;
/* We now take the newline request as advisement. We inspect
* the length of remaining chars on the current line before we agree.
* If some long word is next, then we're going to break it up ugly
* instead of leaving a lot of unused space in our buffer/application.
* It's merely trading one kind of ugly (unused space) for another (broken word).
*
* We want to keep going (no newline) if more than -- say 10% -- of current line
* would become white space by newlining right now.
*
* Set percent_remain tolerance lower than 10% to get more greedy
* with space conservation but get more ugly word breaks.
*
* 5% (0.05) is pretty nice with an avg of only 2 ugly breaks per
* a paragraph with a "reasonable" margin (70 chars or more).
*
* Set to 100% (1.0) and you won't get any ugly breaks -- unless
* you encounter a Huge word that is longer than your margin limit.
*/
if(cur_line_len > 0 )
percent_remain = (double)space_remaining/cur_line_len;
if(percent_remain < 0.25)
{
/* Not much space remaining, we can newline here */
dest[dest_idx++]='\n';
cur_line_len = 0;
}
}
/* Since we are habitually ignoring new line requests made by the cases,
* -- AND because it is possible to get some long character sequence or word
* which may exceed our margin --
* ... check for margin overflow with every loop. */
if(cur_line_len >= max_line_len)
{
/* We have or will overflow with next char.
* This is called breaking the word ugly. Sorry babe.*/
dest[dest_idx++]='\n';
cur_line_len = 0;
/* Track ugly breaks for tolerance & adjusting newline rejections*/
(*ugly_breaks)++;
}
src_idx++;
}
dest[dest_idx++]='\0'; /* cap it */
return dest;
}
int main(int argc, char *argv[])
{
int iii=0, right_margin = RIGHT_MARGIN, ugly=0;
char *cptr;
/* Setup some tab stop and margin visualisations */
puts(" 10 20 30 40 50 60 70 80");
puts("12345678901234567890123456789012345678901234567890123456789012345678901234567890");
puts(" | | | | | | | | | | | | | | | | | (4-char tab stops)");
/* Call the app */
cptr = wordwrap(text, right_margin, &ugly);
/* print result in the buffer, char-by-char: */
for(iii=0; cptr[iii]; iii++)
{
putchar(cptr[iii]);
}
printf("\nword wrap right_margin %d: ugly breaks: %d TAB-to-SPACE char: [%c]\n", right_margin, ugly, SPACE);
printf("strlen(original text): %u\n", strlen(text));
printf("strlen(return text): %u\n", strlen(cptr));
free(cptr);
return 0;
}