C 语言的通用文本换行程序

Question

我正在尝试用 C 语言进行一般文本换行。基本上它应该按以下方式处理文本：

最大线路长度已确定
当处理小于最大行长度的单词时，它必须始终将文本换行在空格中
对于大单词，当达到长度限制时应该将单词分成两部分
它还处理 ANSI 字符以解决“不可见”字符。

到目前为止我想到的是以下内容：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#define MAX_PRINT_LEN 50

void line_length_visualization(size_t maxlength) {
    for (size_t i = 0; i < maxlength; i++) {
        printf("=");
    }
    printf("\n");
}

unsigned int count_big_words(char *str, int max_line_length) {
    unsigned int word_length = 0;        // Length of the word
    unsigned int big_words = 0;          // Number of big words
    // Iterate over the original string to check how many line breaks "\n" will be inserted and for long words 
    size_t str_len = strlen(str);
    for (size_t i = 0; i < str_len + 1; i++) {
        // Increase word_length if it don't find a space
        if (str[i] != ' ') {
            word_length++;
        } 
        else {
            // If a word_length is bigger than the limit, increase the number of big_words
            if (word_length > max_line_length) {
                big_words++;
            }
            // Reset the word length
            word_length = 0;
        }
    }
    // Check the last word
    if (word_length > max_line_length) {
        big_words++;
    }

    return big_words;
}

bool is_a_big_word(char *str, int max_line_length, int last_space_position) {
    unsigned int word_length = 0;         // Size of the analyzed word
    bool inside_ANSI = false;       // Flag to determine if it is within an ANSI character
    // Iterate over the string from the initial position (where it has the last space) until it finds another space or "\0"
    int i = last_space_position + 1;
    while ((str[i] != ' ') && (str[i] != '\0')){
        // Check if it is inside an ANSI escape character
        if (str[i] == '\x1b') {
            inside_ANSI = true;
        }
        // If it is a normal character, increase word length
        if (inside_ANSI == false) {
            word_length++;
        }
        if (inside_ANSI == true && str[i] == 'm') {
            inside_ANSI = false;
        }
        // Advance one position in the string
        i++;
    }
    // Determine if it is a big word or not
    if (word_length >= max_line_length) {
        return true;
    }
    else {
        return false;
    }
}

void get_last_space_pos_and_length(char *str, size_t i, bool *inside_ANSI, int *last_space_position, int *length_counter) {
    /* This function updates the last space position of the string and the length counter to control the line breaks */
    // Check if position is a ANSI escape character
    if (str[i] == '\x1b') {
        (*inside_ANSI) = true;
    }
    // If in position of normal string
    if ((*inside_ANSI) == false) {
        // Check if it is space
        if (str[i] == ' ') {
            // If is a space, hold as last space position
            (*last_space_position) = i;
        }
        // Increase the length counter
        (*length_counter)++;
    }
    // If position is inside a ANSI escape sequense and is in the last position of the sequence flag
    // inside_ANSI as false (this instruction must come after all checks for inside_ANSI variable)
    if ((*inside_ANSI) == true && str[i] == 'm') {
        (*inside_ANSI) = false;
    }   
}

char *split_string_with_small_words(char *str, int max_line_length) {
    // Get the size of the original string
    size_t str_len = strlen(str);
    // Make a duplicate of the string
    char *new_str = strdup(str);  // Size of the string + potential
    int last_space_position = 0;    // Monitor of the last space position
    int length_counter = 0;         // Counter to check max line length
    bool inside_ANSI = false;
    // Iterate over the original string to check how many line breaks "\n" will be inserted            
    for (size_t i = 0; i < str_len + 1; i++) {
        get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
        // Check if length_counter reaches the limit of max_line_length and if it founds any space
        if (length_counter == max_line_length + 1 && last_space_position != 0) {
            // If reaches max_line_length, substitute the last space position with a new line char
            new_str[last_space_position] = '\n';
            // Reset monitor of the length counter to the position of the word after the space
            length_counter = i - last_space_position;
        }
    }

    return new_str;
}

char *my_split_string(char *str, int max_line_length) {
    bool inside_big_word = false;   // Flag to determine if it is within a big word 
    bool moved_right = false;       // Flag to determine if it moved the memory to the right 
    int add_right = 0;
    // Check if there is any big words
    unsigned int big_words = count_big_words(str, max_line_length);    
    // Check if there is a big word
    if (big_words == 0) {
        char *new_str = split_string_with_small_words(str, max_line_length);
        return new_str;       
    } else {
        // If there is any big words, allocate memory accordingly
        size_t str_len = strlen(str);
        size_t new_str_len = str_len + 1 + (str_len/max_line_length); // Length of the string + space for \0 + additional space for line breaks of big words
        char *new_str = malloc(new_str_len);  
        // Declare some variables 
        int last_space_position = 0;    // Monitor of the last space position
        int length_counter = 0;         // Counter to check max line length
        bool inside_ANSI = false;       // Flag to determine if it is within an ANSI character
        size_t j = 0; 
        // Iterate over the string
        for (size_t i = 0; i < str_len; i++) {
            get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
            // If reaches the length limit
            if (length_counter == max_line_length + 1) {
                // Check if it is a big word
                bool big_word = is_a_big_word(str, max_line_length, last_space_position);
                if (big_word == true) {
                    // Put a newline character at the end of the line
                    new_str[j] = '\n';
                    // Define last space position as the end of the big word and advance one character
                    last_space_position = j;
                    j++;
                    // Reset the monitor of the length
                    length_counter = 0;

                } else {
                    // Put a newline character in the last space character added by the number of additional newlines for big words
                    new_str[last_space_position + (j - i)] = '\n';
                    length_counter = i - last_space_position;
                } 

            } 
            // Copy str character to new_str
            new_str[j] = str[i];
            j++;
            
        }
        return new_str;
    } 
}

int main(void) {
    
    char *str_1 = "\x1b[33m=>\x1b[32m This is a very very loong message that needs to be inserted into this program to test the split function. Lets make this string really big to test it properly.\x1b[0m\n";
    char *str_2 = "\x1b[33m=>\x1b[32m File \x1b[35m'luaguedesc/data/in/DRDs/very_long_input_file.txt' (25 bytes)\x1b[32m successfully loaded! I haveeee also another input file to be loaded \x1b[35m'luaguedesc/data/DRDs/input_files/very_long_input_file_2.txt' (27 bytes)\x1b[32m that was successfully loaded! AndAFinalVeryVeryBigWordWithManyCharactersAndNoSpaces.\x1b[0m\n";
    char *str_3 = "\x1b[33m=>\x1b[32m dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauihdsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasihudiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihdaiudas.\x1b[0m\n";
    
    line_length_visualization(MAX_PRINT_LEN);
    char *new_str_1 = my_split_string(str_1, MAX_PRINT_LEN);
    printf("%s", new_str_1);
    
    line_length_visualization(MAX_PRINT_LEN);
    char *new_str_2 = my_split_string(str_2, MAX_PRINT_LEN);
    printf("%s", new_str_2);
    
    
    line_length_visualization(MAX_PRINT_LEN);
    char *new_str_3 = my_split_string(str_3, MAX_PRINT_LEN);
    printf("%s", new_str_3);
    
    free(new_str_1);
    free(new_str_2);
    free(new_str_3);
}

函数

line_length_visualization(...)

用作模板来检查文本是否在正确的位置换行

count_big_words(...)

判断是否存在大于行长度限制的单词

is_a_big_word(...)

确定下一个单词是否大于行长度限制

get_last_space_pos_and_length(...)

确定文本中最后一个空格出现的位置，还负责监控最大长度并处理 ANSI 字符

split_string_with_small_words(...)

是一个用小字包裹文本的功能。该功能运行正常。

my_split_string(...)

应该是通用功能，可以将小字文本和大于行长限制的大字文本都包裹起来。

我使用三个字符串进行测试：第一个是包含小单词的字符串，第二个是包含小单词和大单词的通用字符串，第三个是带有箭头的字符串，后跟一个大单词。

代码的输出如下：

第一串：

==================================================
=> This is a very very loong message that needs to
be inserted into this program to test the split
function. Lets make this string really big to test
it properly.

第二串：

==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeee
also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.

第三弦：

==================================================
=> dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauih
dsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasih
udiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihd
aiudas.

第二个输出看起来不错，但如果我通过在“haveeee”一词后添加一些字符（如haveeeeE）进行轻微更改，则它无法正确换行：

==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeeeE also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.

第三根弦也有问题。它在第二行和第三行中将文本换行了一个字符。

我尝试了很多方法，但找不到解决方案。有人可以帮忙吗？

提前致谢！

Answer 1

这里是一些问题的描述（但我认为它并不能涵盖所有问题）。

count_big_words

完全忽略转义码，即转义码被计为单词的一部分。所以返回的值可能是错误的。

is_a_big_word

有线

int i = last_space_position + 1;

设置迭代的起点。当您第一次在字符串上调用

is_a_big_word

时，

last_space_position

为零，因此迭代从索引 1 开始。换句话说，第一个字符被正确处理。例如，如果第一个字符是转义字符，您的代码不会检测到您正在解析转义序列。

通常，您的代码会查找字符串内的空格，但从不查找换行符（

\n

）。因此，例如尾随换行符将被视为单词的一部分。

C 语言的通用文本换行程序

问题描述投票：0回答：1

1个回答

最新问题

C 语言的通用文本换行程序

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1