我正在尝试用 C 语言进行一般文本换行。基本上它应该按以下方式处理文本:
到目前为止我想到的是以下内容:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define MAX_PRINT_LEN 50
void line_length_visualization(size_t maxlength) {
for (size_t i = 0; i < maxlength; i++) {
printf("=");
}
printf("\n");
}
unsigned int count_big_words(char *str, int max_line_length) {
unsigned int word_length = 0; // Length of the word
unsigned int big_words = 0; // Number of big words
// Iterate over the original string to check how many line breaks "\n" will be inserted and for long words
size_t str_len = strlen(str);
for (size_t i = 0; i < str_len + 1; i++) {
// Increase word_length if it don't find a space
if (str[i] != ' ') {
word_length++;
}
else {
// If a word_length is bigger than the limit, increase the number of big_words
if (word_length > max_line_length) {
big_words++;
}
// Reset the word length
word_length = 0;
}
}
// Check the last word
if (word_length > max_line_length) {
big_words++;
}
return big_words;
}
bool is_a_big_word(char *str, int max_line_length, int last_space_position) {
unsigned int word_length = 0; // Size of the analyzed word
bool inside_ANSI = false; // Flag to determine if it is within an ANSI character
// Iterate over the string from the initial position (where it has the last space) until it finds another space or "\0"
int i = last_space_position + 1;
while ((str[i] != ' ') && (str[i] != '\0')){
// Check if it is inside an ANSI escape character
if (str[i] == '\x1b') {
inside_ANSI = true;
}
// If it is a normal character, increase word length
if (inside_ANSI == false) {
word_length++;
}
if (inside_ANSI == true && str[i] == 'm') {
inside_ANSI = false;
}
// Advance one position in the string
i++;
}
// Determine if it is a big word or not
if (word_length >= max_line_length) {
return true;
}
else {
return false;
}
}
void get_last_space_pos_and_length(char *str, size_t i, bool *inside_ANSI, int *last_space_position, int *length_counter) {
/* This function updates the last space position of the string and the length counter to control the line breaks */
// Check if position is a ANSI escape character
if (str[i] == '\x1b') {
(*inside_ANSI) = true;
}
// If in position of normal string
if ((*inside_ANSI) == false) {
// Check if it is space
if (str[i] == ' ') {
// If is a space, hold as last space position
(*last_space_position) = i;
}
// Increase the length counter
(*length_counter)++;
}
// If position is inside a ANSI escape sequense and is in the last position of the sequence flag
// inside_ANSI as false (this instruction must come after all checks for inside_ANSI variable)
if ((*inside_ANSI) == true && str[i] == 'm') {
(*inside_ANSI) = false;
}
}
char *split_string_with_small_words(char *str, int max_line_length) {
// Get the size of the original string
size_t str_len = strlen(str);
// Make a duplicate of the string
char *new_str = strdup(str); // Size of the string + potential
int last_space_position = 0; // Monitor of the last space position
int length_counter = 0; // Counter to check max line length
bool inside_ANSI = false;
// Iterate over the original string to check how many line breaks "\n" will be inserted
for (size_t i = 0; i < str_len + 1; i++) {
get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
// Check if length_counter reaches the limit of max_line_length and if it founds any space
if (length_counter == max_line_length + 1 && last_space_position != 0) {
// If reaches max_line_length, substitute the last space position with a new line char
new_str[last_space_position] = '\n';
// Reset monitor of the length counter to the position of the word after the space
length_counter = i - last_space_position;
}
}
return new_str;
}
char *my_split_string(char *str, int max_line_length) {
bool inside_big_word = false; // Flag to determine if it is within a big word
bool moved_right = false; // Flag to determine if it moved the memory to the right
int add_right = 0;
// Check if there is any big words
unsigned int big_words = count_big_words(str, max_line_length);
// Check if there is a big word
if (big_words == 0) {
char *new_str = split_string_with_small_words(str, max_line_length);
return new_str;
} else {
// If there is any big words, allocate memory accordingly
size_t str_len = strlen(str);
size_t new_str_len = str_len + 1 + (str_len/max_line_length); // Length of the string + space for \0 + additional space for line breaks of big words
char *new_str = malloc(new_str_len);
// Declare some variables
int last_space_position = 0; // Monitor of the last space position
int length_counter = 0; // Counter to check max line length
bool inside_ANSI = false; // Flag to determine if it is within an ANSI character
size_t j = 0;
// Iterate over the string
for (size_t i = 0; i < str_len; i++) {
get_last_space_pos_and_length(str, i, &inside_ANSI, &last_space_position, &length_counter);
// If reaches the length limit
if (length_counter == max_line_length + 1) {
// Check if it is a big word
bool big_word = is_a_big_word(str, max_line_length, last_space_position);
if (big_word == true) {
// Put a newline character at the end of the line
new_str[j] = '\n';
// Define last space position as the end of the big word and advance one character
last_space_position = j;
j++;
// Reset the monitor of the length
length_counter = 0;
} else {
// Put a newline character in the last space character added by the number of additional newlines for big words
new_str[last_space_position + (j - i)] = '\n';
length_counter = i - last_space_position;
}
}
// Copy str character to new_str
new_str[j] = str[i];
j++;
}
return new_str;
}
}
int main(void) {
char *str_1 = "\x1b[33m=>\x1b[32m This is a very very loong message that needs to be inserted into this program to test the split function. Lets make this string really big to test it properly.\x1b[0m\n";
char *str_2 = "\x1b[33m=>\x1b[32m File \x1b[35m'luaguedesc/data/in/DRDs/very_long_input_file.txt' (25 bytes)\x1b[32m successfully loaded! I haveeee also another input file to be loaded \x1b[35m'luaguedesc/data/DRDs/input_files/very_long_input_file_2.txt' (27 bytes)\x1b[32m that was successfully loaded! AndAFinalVeryVeryBigWordWithManyCharactersAndNoSpaces.\x1b[0m\n";
char *str_3 = "\x1b[33m=>\x1b[32m dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauihdsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasihudiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihdaiudas.\x1b[0m\n";
line_length_visualization(MAX_PRINT_LEN);
char *new_str_1 = my_split_string(str_1, MAX_PRINT_LEN);
printf("%s", new_str_1);
line_length_visualization(MAX_PRINT_LEN);
char *new_str_2 = my_split_string(str_2, MAX_PRINT_LEN);
printf("%s", new_str_2);
line_length_visualization(MAX_PRINT_LEN);
char *new_str_3 = my_split_string(str_3, MAX_PRINT_LEN);
printf("%s", new_str_3);
free(new_str_1);
free(new_str_2);
free(new_str_3);
}
函数
line_length_visualization(...)
用作模板来检查文本是否在正确的位置换行
count_big_words(...)
判断是否存在大于行长度限制的单词
is_a_big_word(...)
确定下一个单词是否大于行长度限制
get_last_space_pos_and_length(...)
确定文本中最后一个空格出现的位置,还负责监控最大长度并处理 ANSI 字符
split_string_with_small_words(...)
是一个用小字包裹文本的功能。该功能运行正常。
my_split_string(...)
应该是通用功能,可以将小字文本和大于行长限制的大字文本都包裹起来。
我使用三个字符串进行测试:第一个是包含小单词的字符串,第二个是包含小单词和大单词的通用字符串,第三个是带有箭头的字符串,后跟一个大单词。
代码的输出如下:
第一串:
==================================================
=> This is a very very loong message that needs to
be inserted into this program to test the split
function. Lets make this string really big to test
it properly.
第二串:
==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeee
also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.
第三弦:
==================================================
=> dhaisdhiasudhuasihdiusahdiusahdhasiudsiuhdsauih
dsuihdsaiuhdsaihudsaiuhdsauihsaduhiasdhuadsiuhdasih
udiuasduhisaiuhdasuihdasuihuidasiuhuhiadsiuhasduihd
aiudas.
第二个输出看起来不错,但如果我通过在“haveeee”一词后添加一些字符(如haveeeeE)进行轻微更改,则它无法正确换行:
==================================================
=> File 'luaguedesc/data/in/DRDs/very_long_input_f
ile.txt' (25 bytes) successfully loaded! I haveeeeE also another input file to be loaded 'luaguedesc/d
ata/DRDs/input_files/very_long_input_file_2.txt'
(27 bytes) that was successfully loaded! AndAFinal
VeryVeryBigWordWithManyCharactersAndNoSpaces.
第三根弦也有问题。它在第二行和第三行中将文本换行了一个字符。
我尝试了很多方法,但找不到解决方案。有人可以帮忙吗?
提前致谢!
这里是一些问题的描述(但我认为它并不能涵盖所有问题)。
count_big_words
完全忽略转义码,即转义码被计为单词的一部分。所以返回的值可能是错误的。
is_a_big_word
有线 int i = last_space_position + 1;
设置迭代的起点。当您第一次在字符串上调用 is_a_big_word
时,last_space_position
为零,因此迭代从索引 1 开始。换句话说,第一个字符被正确处理。例如,如果第一个字符是转义字符,您的代码不会检测到您正在解析转义序列。
通常,您的代码会查找字符串内的空格,但从不查找换行符(
\n
)。因此,例如尾随换行符将被视为单词的一部分。