0%

Lex构建语法分析器

Lex是linux平台下的一种词法分析程序生成器,它可以根据词法规则说明书的要求来生成单词识别程序,由该程序识别出输入文本中的各个单词

在Ubuntu下安装及使用Lex的流程为

1
2
3
4
5
sudo apt install flex
#编辑lex文件,保存为.l格式,在此设文件名为1.l
flex 1.l
gcc lex.yy.c -o 1 #编译生成的C文件
./1

Lex文件主要由三部分构成

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
[% 
// 定义部分,定义所需的C头文件,全局变量等
#include<stdio.h>
int yywrap();
%]
%%
/*
规则部分,词法规则由模式和动作两部分组成,即正则表达式+C语言语句
注意Lex将识别结果存放在yytext[]中
*/
%%
/*
自定义子程序部分
如果编译不加-ll选项,则子程序部分必须包含main函数和yywrap函数
*/
void main(){
yylex();
}
int yywrap(){
return 1;
}
/*
int yywarp(void)
在文件(或输入)的末尾调用,如果函数的返回值是1,就停止解析
因此它可以用来解析多个文件,代码可以写在第三段,这样可以解析多个文件
方法是使用yyin文件指针指向不同的文件,直到所有的文件都被解析
最后,yywrap()可以返回1来表示解析的结束
*/

假设我们要完成一个基础的C词法分析器,要求如下

  1. 关键字:
    if else int return void while
  2. 符号:
    + - * / < <= > >= == != = ; , ( ) [ ] { } /* */
  3. ID和NUM:
    ID = letter letter*
    NUM = digit digit*
    letter = a|…|z|A|…|Z
    digit = 0|…|9

我的lex实现如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
%{
#include<stdio.h>
#include<string.h>
int yywrap();
char id[50][1005];
char tmpLine[100005];
char tmpString[100005];
int idNum = 0;
int lineNum = 1;
%}
%%
else {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, else>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
if {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, if>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
int {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, int>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
return {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, return>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
void {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, void>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
while {char tmp[1005]; sprintf(tmp, "\n\tline %d: <reserve_word, while>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}

\/\/[^\n]* {char tmp[1005]; sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, yytext); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
"/*"([^\*]|(\*)*[^\*/])*(\*)*"*/" {
char tmp[1005];
char comment[1005];
int p = 0,q = 0;
while(q < strlen(yytext)){
if(yytext[q] == '\n'){
q++;
comment[p] = 0;
sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, comment);
strcat(tmpLine, " ");
strcat(tmpLine, comment);
strcat(tmpString, tmp);
if(lineNum > 1)
printf("\n");
printf("line %d:%s%s", lineNum, tmpLine, tmpString);
memset(tmpLine, 0, sizeof(tmpLine));
memset(tmpString , 0, sizeof(tmpString));
lineNum++;
p = 0;
}
comment[p++] = yytext[q++];
}
comment[p] = 0;
sprintf(tmp, "\n\tline %d: <COMMENT, %s>", lineNum, comment);
strcat(tmpLine, " ");
strcat(tmpLine, comment);
strcat(tmpString, tmp);
}
\+ {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, +>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\- {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ->", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\* {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, *>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\/ {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, />", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\< {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, <>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\<\= {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, <=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\> {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, >>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\>\= {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, >=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\=\= {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ==>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\!\= {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, !=>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\= {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, =>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\; {char tmp[1005]; sprintf(tmp, "\n\tline %d: <;>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\, {char tmp[1005]; sprintf(tmp, "\n\tline %d: <OP, ,>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\( {char tmp[1005]; sprintf(tmp, "\n\tline %d: <(>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\) {char tmp[1005]; sprintf(tmp, "\n\tline %d: <)>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\[ {char tmp[1005]; sprintf(tmp, "\n\tline %d: <[>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\] {char tmp[1005]; sprintf(tmp, "\n\tline %d: <]>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\{ {char tmp[1005]; sprintf(tmp, "\n\tline %d: <{>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\} {char tmp[1005]; sprintf(tmp, "\n\tline %d: <}>", lineNum); strcat(tmpLine, " "); strcat(tmpLine, yytext); strcat(tmpString, tmp);}

[' '\t]+ {;}
[a-zA-Z]+ {
int flag = 0;
for(int i = 0;i < idNum;i++){
if(!strcmp(yytext, id[i])){
char tmp[1005];
strcat(tmpLine, " ");
sprintf(tmp, "\n\tline %d: <ID, %d>", lineNum, i+1);
strcat(tmpLine, yytext);
strcat(tmpString, tmp);
flag = 1;
}
if(flag)
break;
}
if(!flag){
strcpy(id[idNum], yytext);
char tmp[1005];
strcat(tmpLine, " ");
sprintf(tmp, "\n\tline %d: <ID, %d>", lineNum, ++idNum);
strcat(tmpLine, yytext);
strcat(tmpString, tmp);
}
}

[0-9]+ {char tmp[1005]; strcat(tmpLine, " "); sprintf(tmp, "\n\tline %d: <NUM, %s>", lineNum, yytext); strcat(tmpLine, yytext); strcat(tmpString, tmp);}
\n {
if(lineNum > 1)
printf("\n");
printf("line %d:%s%s", lineNum, tmpLine, tmpString);
memset(tmpLine, 0, sizeof(tmpLine));
memset(tmpString , 0, sizeof(tmpString));
lineNum++;
}

[a-zA-Z]+[0-9] {printf("Error in line %d\n", lineNum); exit(1);}
[0-9]+[a-zA-Z] {printf("Error in line %d\n", lineNum); exit(1);}
. {printf("Error in line %d\n", lineNum); exit(1);}
%%
void main(){
yylex();
printf("\n");
}

int yywrap(){
return 1;
}