c语言实现一个简单的词法分析程序
单词符号 |
种别码 |
|
单词符号 |
种别码 |
program |
1 |
28 |
||
var |
2 |
29 |
||
integer |
3 |
30 |
||
bool |
4 |
:= |
31 |
|
real |
5 |
< |
32 |
|
char |
6 |
> |
33 |
|
const |
7 |
<= |
34 |
|
begin |
8 |
>= |
35 |
|
if |
9 |
== |
36 |
|
then |
10 |
<> |
37 |
|
else |
11 |
38 |
||
while |
12 |
39 |
||
do |
13 |
40 |
||
for |
14 |
41 |
||
to |
15 |
42 |
||
end |
16 |
43 |
||
read |
17 |
/* |
44 |
|
write |
18 |
*/ |
45 |
|
true |
19 |
& |
46 |
|
false |
20 |
47 |
||
not |
21 |
48 |
||
and |
22 |
49 |
||
or |
23 |
50 |
||
Letter(letter|digit) |
24 |
51 |
||
digit digit * |
25 |
52 |
||
+ |
26 |
53 |
||
- |
27 |
0 |
||
!= |
56 |
++ |
54 |
|
&& |
57 |
-- |
55 |
比较简略还会再完善!
1.2.3词法分析程序的功能
输入:所给文法的源程序字符串。
输出:二元组(syn,token或sum)构成的序列。
其中:syn为单词种别码;
token为存放的单词自身字符串;
sum为整型常数。
例如:对源程序
begin x:=9; if x>0 then x:= 2*x+1/3; end #
的源文件,经词法分析后输出如下序列:
(1, begin)(10,’x’)(18,:=) ( 11,9) (26,;) (2,if)…
功能测试:
(1) 输入字符串begin x:=9; if x>0 then x:= 2*x+1/3; end #
其结果如下图所示:
2)输入程序
program:
var a=0;
for(a;a<=100;a++){
while(a>89&a<95)
do a=0;}#
(3)输入程序:
for (n = 0; n < 20; n++)
token[n] = NULL;
m = 0;
ch = prog[p++];
while (ch == ' ')
ch = prog[p++];
if ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A'))
while ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A') || (ch <= '9' && ch >= '0'))
token[m++] = ch;
ch = prog[p++];
token[m++] = '\0';
ch = prog[--p];
syn = 24;
for (n = 0; n < 23; n++)
if (strcmp(token, rwtab[n]) == 0)
syn = n + 1;
break;
else
if ((ch <= '9' && ch >= '0'))
{sum = 0;
while ((ch <= '9' && ch >= '0'))
{sum = sum * 10 + ch - '0';
ch = prog[p++];}
ch = prog[--p];
syn = 25;#
源程序:
#include
#include
char prog[180], token[20];//prog 输入的字符长度,token单个词长度
char ch;
int syn, p, m, n, sum;
char* rwtab[23] = { "program","var","integer","bool","real","char","const","begin","if","then",
"else","while","do","for","to","end","read","write","true","false","not","and","or" };//关键字
main()
{
p = 0;
printf("\nplease intput string:");
do
{
ch = getchar();
prog[p++] = ch;
} while (ch != '#');
p = 0;
do
{
scaner();
switch (syn)
{
case 11:printf("(%d,%d)", syn, sum); break;
case -1:printf("input error\n"); break;
default:printf("(%d,%s)", syn, token);
}
} while (syn != 0);
getch();
}
/*词法扫描程序:*/
scaner()
{
for (n = 0; n < 20; n++)
token[n] = NULL;
m = 0;
ch = prog[p++];
while (ch == ' ')
ch = prog[p++];
if ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A'))
{
while ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A') || (ch <= '9' && ch >= '0'))
{
token[m++] = ch;
ch = prog[p++];
}
token[m++] = '\0';
ch = prog[--p];
syn = 24;
for (n = 0; n < 23; n++)
if (strcmp(token, rwtab[n]) == 0)
{
syn = n + 1;
break;
}
}
else
if ((ch <= '9' && ch >= '0'))
{
sum = 0;
while ((ch <= '9' && ch >= '0'))
{
sum = sum * 10 + ch - '0';
ch = prog[p++];
}
ch = prog[--p];
syn = 25;
}
else
switch (ch)
{
case '<':m = 0; token[m++] = ch;
ch = prog[p++];
if (ch == '>')
{
syn = 37;
token[m++] = ch;
}
else
if (ch == '=')
{
syn = 34;
token[m++] = ch;
}
else
{
syn = 32;
ch = prog[--p];
}
break;
case '>':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 35;
token[m++] = ch;
}
else
{
syn = 33;
ch = prog[--p];
}
break;
case ':':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 31;
token[m++] = ch;
}
else
{
syn = 30;
ch = prog[--p];
}
break;
case '+':token[m++] = ch;
ch = prog[p++];
if (ch == '+')
{
syn = 54;
}
else {
syn = 26;
ch = prog[--p];
}
break;
case '-':token[m++] = ch;
ch = prog[p++];
if (ch == '-')
{
syn = 55;
}
else {
syn = 27;
ch = prog[--p];
}
break;
case '/':token[m++] = ch;
ch = prog[p++];
if (ch == '*')
{
syn = 44;
}
else {
syn = 29;
ch = prog[--p];
}
break;
case '*':token[m++] = ch;
ch = prog[p++];
if (ch == '/')
{
syn = 28;
}
else {
syn = 45;
ch = prog[--p];
}
break;
case ';':
syn = 39; token[0] = ch; break;
case '(':
syn = 42; token[0] = ch; break;
case ')':
syn = 43; token[0] = ch; break;
case '&':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 57;
}
else {
syn = 46;
ch = prog[--p];
}
break;
case '=':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 36;
}
else {
syn = 38;
ch = prog[--p];
}
break;
case ',':
syn = 40; token[0] = ch; break;
case '`':
syn = 41; token[0] = ch; break;
case '%':
syn = 47; token[0] = ch; break;
case ',':
syn = 48; token[0] = ch; break;
case '!':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 56;
}
else {
syn = 49;
ch = prog[--p];
}
break;
case '[':
syn = 52; token[0] = ch; break;
case ']':
syn = 53; token[0] = ch; break;
case '{':
syn = 50; token[0] = ch; break;
case '}':
syn = 51; token[0] = ch; break;
case '#':
syn = 0; token[0] = ch; break;
default:
syn = -1;
}
}