我一直在努力开发一个lex扫描仪,但是当我输入它的输入文件时,它产生了错误的输出。这是我的源代码:
%{
#include <stdio.h>
int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount=0;
int NumberOfComments=0;
%}
DIGIT [0-9]*
ID [a-z][a-z0-9]*
%x COMMENT
%option noyywrap
%%
^[\t]*"/*" {BEGIN COMMENT;}
^[\t]*"/*".*"*/"[\t]*\n {NumberOfComments++;}
<COMMENT>"*/"[\t]*\n {BEGIN 0; NumberOfComments++;}
<COMMENT>"*/" {BEGIN 0;}
<COMMENT>\n {NumberOfComments++;}
<COMMENT>.\n {NumberOfComments++;}
\n {NumberOfLines++, NumberOfChar++; NumberOfChar +=strlen(yytext);}
. {NumberOfChar++; NumberOfChar +=strlen(yytext);}
{DIGIT} {NumberOfIntegers++; NumberOfChar +=strlen(yytext); }
{DIGIT}+"."{DIGIT}* {
printf("A flot: %s (%g) \n", yytext, atof(yytext));
NumberOfChar +=strlen(yytext);
}
if|else|while|return {
printf("A keyword: %s\n", yytext); KWCount++;
NumberOfChar +=strlen(yytext);
}
{ID} {
printf("An identifier: %s\n", yytext);
NumberOfChar +=strlen(yytext);
}
"{"[^}\n]*"}" {
/*each up one-line comments*/
NumberOfChar +=strlen(yytext);
}
%%
int main(int argc, char **argv){
++argv, --argc; /*skip over program name */
if (argc > 0)
yyin = fopen(argv[0], "r");
else
yyin = stdin;
yylex();
printf("Character count: %d",NumberOfChar);
printf("\n");
printf("Number count: %d",NumberOfIntegers);
printf("\n");
printf("Keyword count: %d",KWCount);
printf("\n");
printf("Line count: %d",NumberOfLines);
printf("\n");
printf("Comment count: %d", NumberOfComments);
printf("\n");
return 0;
}
每当我使用源运行我的输入文件时,它会给我错误的输出。例如,文件的输出应该是:
输出:
Number of Keywords: 3
Number of Characters: 196
Number of Lines: 17
Number of Digits: 3
但是它目前产生的输出是:
输出:
Number of keywords: 0
Number of Characters: 3
Number of Lines: 7
Number of Digits: 0
我怀疑它与我的正则表达式有关,任何帮助都会受到赞赏,因为我还在学习正则表达式!
这是我的输入文件内容:
/*comment 1*/
/*comment
comment 2
*/
/*comment 3*
*/if this is a line
{comment 4}
int i = 789;
int j = 689;
if i == 172 then
{comment 5}
else
{comment 6}
{comment 7}
/*8 comments
*
*/
end
以下是一些主要基于代码的工作代码。
%{
#include <stdio.h>
int NumberOfLines=0;
int NumberOfChar=0;
int NumberOfIntegers=0;
int KWCount = 0;
int IDCount = 0;
int RCCount = 0;
int OCCount = 0;
int DTCount = 0;
int FLCount = 0;
%}
%option noyywrap
%option noinput
%option nounput
DIGIT [0-9]*
ID [a-z][a-z0-9]*
%%
\n {NumberOfLines++; NumberOfChar++; RCCount += strlen(yytext); }
. {NumberOfChar++; DTCount++; RCCount++; printf(" '%c'", yytext[0]); }
{DIGIT} {NumberOfIntegers++; RCCount += strlen(yytext); }
{DIGIT}+"."{DIGIT}* {
printf("\nA float: %s (%g) \n", yytext, atof(yytext));
RCCount += strlen(yytext);
FLCount++;
}
if|else|while|return {
printf("\nA keyword: %s\n", yytext);
KWCount++;
RCCount += strlen(yytext);
}
{ID} {
printf("\nAn identifier: %s\n", yytext);
IDCount++;
RCCount += strlen(yytext);
}
"{"[^}\n]*"}" {
RCCount += strlen(yytext);
OCCount += strlen(yytext);
}
%%
int main(int argc, char **argv){
++argv, --argc; /*skip over program name */
if (argc > 0)
yyin = fopen(argv[0], "r");
else
yyin = stdin;
yylex();
printf("Character count: %d\n", NumberOfChar);
printf("Number count: %d\n", NumberOfIntegers);
printf("Keyword count: %d\n", KWCount);
printf("Line count: %d\n", NumberOfLines);
printf("ID count: %d\n", IDCount);
printf("Dot count: %d\n", DTCount);
printf("Raw count: %d\n", RCCount);
printf("Float count: %d\n", FLCount);
printf("Other count: %d\n", OCCount);
printf("\n");
return 0;
}
在数据文件上运行时:
/*commEnt 1*/
/*COMMENT
commEnt 2
*/
/*commEnt 3*
*/if this is a linE
{commEnt 4}
int i = 789;
int j = 689;
if i == 172 thEn
{commEnt 5}
ElsE
{commEnt 6}
{commEnt 7}
float 12.34
/*8 commEnts
*
else
return
while
the
going
is
good
*/
end
我得到输出:
'/' '*'
An identifier: comm
'E'
An identifier: nt
' ' '1' '*' '/' '/' '*' 'C' 'O' 'M' 'M' 'E' 'N' 'T' ' ' ' '
An identifier: comm
'E'
An identifier: nt
' ' '2' ' ' ' ' ' ' '*' '/' ' ' ' ' '/' '*'
An identifier: comm
'E'
An identifier: nt
' ' '3' '*' ' ' ' ' ' ' '*' '/'
A keyword: if
' '
An identifier: this
' '
An identifier: is
' ' 'a' ' '
An identifier: lin
'E'
An identifier: int
' ' 'i' ' ' '=' ' ' ';' ' '
An identifier: int
' ' 'j' ' ' '=' ' ' ';'
A keyword: if
' ' 'i' ' ' '=' '=' ' ' ' '
An identifier: th
'E' 'n' ' ' 'E'
An identifier: ls
'E'
An identifier: float
' '
A float: 12.34 (12.34)
'/' '*' '8' ' '
An identifier: comm
'E'
An identifier: nts
' ' '*' ' '
A keyword: else
' '
A keyword: return
' '
A keyword: while
' '
An identifier: the
' '
An identifier: going
' '
An identifier: is
' '
An identifier: good
' ' '*' '/' ' '
An identifier: end
Character count: 115
Number count: 3
Keyword count: 5
Line count: 26
ID count: 21
Dot count: 89
Raw count: 258
Float count: 1
Other count: 44
wc
的输出是:
$ wc data.2
26 49 258 data.2
$
字符的“原始计数”与wc
的字符数相匹配;行数也匹配。鉴于大写字母在“点字符”中计算,整数,浮点数,关键字和标识符的数量看起来都是正确的。你可以弄清楚是否还有其他问题;我认为整数计数是错误的,但我不确定为什么。