c语言实现一个简单的词法分析程序


单词符号

种别码

 

单词符号

种别码

program

1

28

var

2

29

integer

3

30

bool

4

:=

31

real

5

<

32

char

6

>

33

const

7

<=

34

begin

8

>=

35

if

9

==

36

then

10

<>

37

else

11

38

while

12

39

do

13

40

for

14

41

to

15

42

end

16

43

read

17

/*

44

write

18

*/

45

true

19

&

46

false

20

47

not

21

48

and

22

49

or

23

50

Letter(letter|digit)

24

51

digit digit *

25

52

+

26

53

-

27

0

=

56

++

54

&&

57

--

55

比较简略还会再完善!

1.2.3词法分析程序的功能

 

输入:所给文法的源程序字符串。

输出:二元组(syntokensum)构成的序列。

其中:syn为单词种别码;

      token为存放的单词自身字符串;

      sum为整型常数。

例如:对源程序

      begin  x:=9;  if x>0  then  x:= 2*x+1/3;  end #

的源文件,经词法分析后输出如下序列:

(1, begin)(10x(18,:=)  ( 11,9)  (26,;)  (2,if)

    功能测试:

(1) 输入字符串begin  x:=9;  if x>0  then  x:= 2*x+1/3;  end #

其结果如下图所示:

2)输入程序

program:

var a=0;

for(a;a<=100;a++){

while(a>89&a<95)

do a=0;}#

3)输入程序:

for (n = 0; n < 20; n++)

token[n] = NULL;

m = 0;

ch = prog[p++];

while (ch == ' ')

ch = prog[p++];

if ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A'))

while ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A') || (ch <= '9' && ch >= '0'))

token[m++] = ch;

ch = prog[p++];

token[m++] = '\0';

ch = prog[--p];

syn = 24;

for (n = 0; n < 23; n++)

if (strcmp(token, rwtab[n]) == 0)

syn = n + 1;

break;

else

if ((ch <= '9' && ch >= '0'))

{sum = 0;

while ((ch <= '9' && ch >= '0'))

{sum = sum * 10 + ch - '0';

ch = prog[p++];}

ch = prog[--p];

syn = 25;#

 源程序:

#include
#include

char prog[180], token[20];//prog 输入的字符长度,token单个词长度
char ch;
int syn, p, m, n, sum;
char* rwtab[23] = { "program","var","integer","bool","real","char","const","begin","if","then",
"else","while","do","for","to","end","read","write","true","false","not","and","or" };//关键字
main()
{
p = 0;
printf("\nplease intput string:");
do
{
ch = getchar();
prog[p++] = ch;
} while (ch != '#');
p = 0;
do
{
scaner();
switch (syn)
{
case 11:printf("(%d,%d)", syn, sum); break;
case -1:printf("input error\n"); break;
default:printf("(%d,%s)", syn, token);
}
} while (syn != 0);
getch();
}


/*词法扫描程序:*/
scaner()
{
for (n = 0; n < 20; n++)
token[n] = NULL;
m = 0;
ch = prog[p++];
while (ch == ' ')
ch = prog[p++];
if ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A'))
{
while ((ch <= 'z' && ch >= 'a') || (ch <= 'Z' && ch >= 'A') || (ch <= '9' && ch >= '0'))
{
token[m++] = ch;
ch = prog[p++];
}
token[m++] = '\0';
ch = prog[--p];
syn = 24;
for (n = 0; n < 23; n++)
if (strcmp(token, rwtab[n]) == 0)
{
syn = n + 1;
break;
}
}
else
if ((ch <= '9' && ch >= '0'))
{
sum = 0;
while ((ch <= '9' && ch >= '0'))
{
sum = sum * 10 + ch - '0';
ch = prog[p++];
}
ch = prog[--p];
syn = 25;
}
else
switch (ch)
{
case '<':m = 0; token[m++] = ch;
ch = prog[p++];
if (ch == '>')
{
syn = 37;
token[m++] = ch;
}
else
if (ch == '=')
{
syn = 34;
token[m++] = ch;
}
else
{
syn = 32;
ch = prog[--p];
}
break;
case '>':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 35;
token[m++] = ch;
}
else
{
syn = 33;
ch = prog[--p];
}
break;
case ':':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 31;
token[m++] = ch;
}
else
{
syn = 30;
ch = prog[--p];
}
break;

case '+':token[m++] = ch;
ch = prog[p++];
if (ch == '+')
{
syn = 54;
}
else {
syn = 26;
ch = prog[--p];
}
break;
case '-':token[m++] = ch;
ch = prog[p++];
if (ch == '-')
{
syn = 55;
}
else {
syn = 27;
ch = prog[--p];
}
break;

case '/':token[m++] = ch;
ch = prog[p++];
if (ch == '*')
{
syn = 44;
}
else {
syn = 29;
ch = prog[--p];
}
break;
case '*':token[m++] = ch;
ch = prog[p++];
if (ch == '/')
{
syn = 28;
}
else {
syn = 45;
ch = prog[--p];
}
break;
case ';':
syn = 39; token[0] = ch; break;
case '(':
syn = 42; token[0] = ch; break;
case ')':
syn = 43; token[0] = ch; break;
case '&':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 57;
}
else {
syn = 46;
ch = prog[--p];
}

break;
case '=':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 36;
}
else {
syn = 38;
ch = prog[--p];
}
break;
case ',':
syn = 40; token[0] = ch; break;
case '`':
syn = 41; token[0] = ch; break;
case '%':
syn = 47; token[0] = ch; break;
case ',':
syn = 48; token[0] = ch; break;
case '!':token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 56;
}
else {
syn = 49;
ch = prog[--p];
}
break;

case '[':
syn = 52; token[0] = ch; break;
case ']':
syn = 53; token[0] = ch; break;
case '{':
syn = 50; token[0] = ch; break;
case '}':
syn = 51; token[0] = ch; break;
case '#':
syn = 0; token[0] = ch; break;
default:
syn = -1;
}
}