我目前正在尝试使用
sscanf
解析 .csv 文件。我制作了一个函数来检测行首的引号。
int checkString(const char *str) {
if (str[0] == '"') {
return 1;
}
return 0;
}
我需要解析文件的帮助,特别是因为 csv 文件的格式各不相同。
这是我尝试使用
sscanf
解析文件
int main() {
char inputLine[100]; //input line
Node *pList = NULL; //initialize linked list
FILE *inputStream = fopen("../musicPlayList.csv", "r");
Record testArr[100];
Record inputRecord;
Duration inputDuration;
int inputTimesPlayed = 0;
int inputRating = 0;
char artistToken[20];
char albumTitleToken[20];
char songTitleToken[20];
char genreToken[20];
char durationToken[20];
char timesPlayedToken[20];
char ratingToken[20];
if (inputStream != NULL) {
while (fgets(inputLine, sizeof(inputLine), inputStream) != NULL) {
if (checkString(inputLine)) {
sscanf(inputLine, "\"%[^\"]\" %s %s %s %s %s %s",
artistToken, albumTitleToken,
songTitleToken, genreToken, durationToken,
timesPlayedToken, ratingToken);
} else {
sscanf(inputLine, "%s %s %s %s %s %s %s",
artistToken, albumTitleToken,
songTitleToken, genreToken, durationToken,
timesPlayedToken, ratingToken);
}
printf("%s %s %s %s %s %s %s\n",
artistToken, albumTitleToken, songTitleToken,
genreToken, durationToken, timesPlayedToken,
ratingToken);
}
}
return 0;
}
这是 .csv 文件:
"Swift, Taylor",1989,Shake it Off,Pop,3:35,12,3
Drake,NOTHING WAS THE SAME,Own it,Rap,3:23,3,3
Drake,YOU WELCOME,The Motto,Rap,4:13,7,4
"Perri, Christina",HEAD OF HEART,Trust,Pop,2:35,3,5
"Bieber, Justin",PURPOSE,No Sense,Pop,4:12,6,1
Eminem,SHADYXV,Vegas,Rap,3:37,8,3
Adele,25,Remedy,Pop,4:11,24,4
"Swift, Taylor",RED,Stay Stay Stay,Pop,4:42,5,1
"Brooks, Garth",FRESH HORSES,The Old Stuff,Country,2:57,11,2
Swift, Taylor ,1989 , Shake it Off, Pop, 3:35,12,3 00doA
Drake, NOTHING WAS THE SAME, Own it, Rap, 3:23,3, 3
Drake, YOU WELCOME, The Motto, Rap, 4:13,7,4 SAME, Own it, Rap, 3:23,3,3
Perri, Christina ,HEAD ,3,5 HEART, Trust, Pop,2:35,3,5 it, Rap, 3:23,3,3
Bieber, Justin ,PURPOSE, No Sense, Pop, 4:12,6,1 HEART, Trust, Pop, 2:35Sense, Pop, 4:12,6,1 it, Rap, 3:23, 3, 3
Eminem, SHADYXV, Vegas, Rap, 3:37,8,3 , PURPOSE, No Sense, Pop, 4:12,6,1 HEART, Trust, Pop, 2:35Sense, Pop, 4:12,6,1 it, Rap, 3:23,3, 3
Adele, 25, Remedy, Pop, 4:11, 24,4 , PURPOSE, No Sense, Pop, 4:12,6,1 HEART, Trust, Pop, 2:35Sense, Pop, 4:12,6,1 it, Rap, 3:23,3 , 3
Swift, Taylor ,RED, Stay Stay Stay, Pop, 4:42,5,1 it, Rap, 3:23,3, 3
Brooks, Garth ,FRESH HORSES, The 1,2 Stuff, Country,2:57, 11, 2
Process finished with exit code 0
为什么它返回垃圾值,例如
00doA
。
sscanf
可以用来解析CVS行,但它是一个非常棘手的工具,有很多缺点。您的代码有问题:
NOTHING WAS THE SAME
有 20 个字符,它不适合 20 个 char
的数组,因为 sscanf
将在字符串末尾存储一个空终止符。使数组更大并在 %
和转换说明符之间传递字符计数。%s
解析由空格分隔的单词,您需要 %19[^,]
的格式来解析由逗号分隔的字段。sscanf
将无法解析空字段sscanf()
的返回值以检测格式错误或丢失的输入。如果任何字段解析不正确,则其余令牌未修改,可能未初始化,这可以解释垃圾输出。这是修改后的版本:
#include <errno.h>
#include <stdio.h>
#include <string.h>
int main(void) {
char inputLine[200]; //input line
//Node *pList = NULL;
//Record testArr[100];
//Record inputRecord;
//Duration inputDuration;
//int inputTimesPlayed = 0;
//int inputRating = 0;
char artistToken[30];
char albumTitleToken[30];
char songTitleToken[30];
char genreToken[20];
char durationToken[20];
char timesPlayedToken[20];
char ratingToken[20];
char eol[2];
const char *filename = "musicPlayList.csv";
int lineno = 0;
FILE *inputStream = fopen(filename, "r");
if (inputStream == NULL) {
fprintf(stderr, "Cannot open %s: %s\n", filename, strerror(errno));
return 1;
}
while (fgets(inputLine, sizeof(inputLine), inputStream) != NULL) {
lineno++;
if (sscanf(inputLine, "\"%29[^\"]\",%29[^,],%29[^,],%19[^,],%19[^,],%19[^,],%19[^,\n]%1[\n]",
artistToken, albumTitleToken, songTitleToken, genreToken,
durationToken, timesPlayedToken, ratingToken, eol) == 8
|| sscanf(inputLine, "%29[^,],%29[^,],%29[^,],%19[^,],%19[^,],%19[^,],%19[^,\n]%1[\n]",
artistToken, albumTitleToken, songTitleToken, genreToken,
durationToken, timesPlayedToken, ratingToken, eol) == 8)
{
printf("%s|%s|%s|%s|%s|%s|%s\n",
artistToken, albumTitleToken, songTitleToken, genreToken,
durationToken, timesPlayedToken, ratingToken);
} else {
printf("%s:%d: invalid format: %s\n", filename, lineno, inputLine);
}
}
fclose(inputStream);
return 0;
}
输出:
Swift, Taylor|1989|Shake it Off|Pop|3:35|12|3
Drake|NOTHING WAS THE SAME|Own it|Rap|3:23|3|3
Drake|YOU WELCOME|The Motto|Rap|4:13|7|4
Perri, Christina|HEAD OF HEART|Trust|Pop|2:35|3|5
Bieber, Justin|PURPOSE|No Sense|Pop|4:12|6|1
Eminem|SHADYXV|Vegas|Rap|3:37|8|3
Adele|25|Remedy|Pop|4:11|24|4
Swift, Taylor|RED|Stay Stay Stay|Pop|4:42|5|1
Brooks, Garth|FRESH HORSES|The Old Stuff|Country|2:57|11|2
但请注意,如果专辑标题或歌曲标题用双引号编码,特别是如果它包含嵌入的换行符,则此代码将失败。更先进的手工编码 CSV 解析器可以处理这些变化。