Hello World

Your description here.

词法分析

默认分类 0 评

title: 词法分析
tags:

  • 词法分析
  • 编译原理

author: 半坡上
img: 'https://cdn.jsdelivr.net/gh/yifangtan/YiPhoto@main/Photo/ok.jpeg'
toc: true
categories: 编译原理
summary: 这是编译原理中一个小实验:设计并实现一个词法分析器,相关代码如下。
abbrlink: 45309

date: 2021-12-23 18:59:01

一、实验目的

设计并实现一个词法分析器,深刻理解编译原理中词法分析器的原理。

二、实验内容

通过使用自己熟悉的语言设计并实现一个词法分析器,是此法分析器按要求的格式输出经过分析的程序段。

要求分析一下程序片段:


//example.txt文件
const a=10;
var b,c;
procedure p;
 begin
  c:=b+a;
 end;
begin
 read(b);
 while b#0 do
 begin
  call p;writeln(2*c);read(b);
 end
end.

该程序片段存放在example.txt中。

三、实验步骤

1、对PL\0文法中各类单词分类:

(1)保留字:const、var、procedure、begin、end、if、then、while、do、 read、call、write、writeln

(2)常数:由0-----9这几个数字组成

(3)标识符:由字母打头的字母和数字的字符串

(4)运算符:+,-,*,/,:=,>=,<=,#

(5)分界符:,、.、(、)、;

2、将各类单词对应到lex中:

(1)保留字:reservedWord [const|var|procedure|begin|end|if|then|while|do|read|call|write|writeln]

​ 由于在lex中不区分大小写,所以将保留字写成:reservedWord

[cC][oO][nN][sS][tT]|[vV][aA][rR]|[pP][rR][oO][cC][eE][dD][uU][rR][eE]| 
[bB][eE][gG][iI][nN]|[eE][nN][dD]|[iI][fF]|[tT][hH][eE][nN]|[wW][hH][iI]
[lL][eE]|[dD][oO]|[rR][eE][aA][dD]|[cC][aA][lL][lL]|[wW][rR][iI][tT][eE]
[wW][rR][iI][tT][eE][lL][nN]

(2)常数:

constant ([0-9])+ /0---9这几个数字可以重复/

(3)标识符:

identfier [A-Za-z]([A-Za-z][0-9])*

(4)运算符:

operator +|-|*|/|:=|>=|<=|#|= /在lex中,有特殊意义的运算符要加转义字符\,如+、及*、/

(5)分界符:

delimiter [,.;]

3、PL/0的语言的词法分析器要求跳过分隔符(如空格,回车,制表符),对应的lex定义为:

delim [""\n\t]

whitespace{delim}+

4、为lex制定一些规则:

 {reservedWord}{count++;printf("\t%d\t(1,‘%s’)\n",count,yytext);}                                          /*对保留字定规则,输出设置*/
{operator} {count++;printf("\t%d\t(2,‘%s’)\n",count,yytext); }
{delimiter}{count++;printf("\t%d\t(3,‘%s’)\n",count,yytext);}
{constant}{count++;printf("\t%d\t(4,‘%s’)\n",count,yytext);}
{identfier}{count++;printf("\t%d\t(5,‘%s’)\n",count,yytext);}
{whitespace} {/* do  nothing*/ }     /*遇到空格,什么都不做*/
 

四、代码设计

​ 本程序使用c语言编写而成,采用栈存储文件数据(一开始想得数组,但又想到定义数组开辟空间不好把握,容易造成资源浪费,故而采用栈式存储,空间不够可以适量开辟空间,很大程度上缩减资源浪费。)然后就是各个函数中遍历检索所得数据,再构造一个数组,存放检索后的数据,输出即可!完整代码如下:

#include <stdio.h>
#include <stdlib.h>
#include<stdbool.h>
#include<string.h>
#define STACK_INIT_SIZE 100    /*存储空间初始分配量*/
#define STACKINCREMENT 20    /*存储空间分配增量*/
typedef struct {
  char *top;
  char *base;
  int stacksize;
}SqStack;
int InitStack (SqStack *s) {   /*构造一个空栈*/
    s -> base = (char*)malloc(STACK_INIT_SIZE*sizeof(char));
    s -> top = s -> base;
    s -> stacksize = STACK_INIT_SIZE;
    return 1;
}
int push (SqStack *s, char e) {    /*插入元素e为新的栈顶元素*/
    if (s -> top - s -> base >= s -> stacksize) {
        s -> base = (char*)realloc(s -> base, (s -> stacksize + STACKINCREMENT)*sizeof(char));
        if (!s -> base) {    /*栈溢出*/
            return -1;
        }
        s -> top = s -> base + s -> stacksize;
        s -> stacksize += STACKINCREMENT;
    }
    *(s -> top) = e;    /*先赋值再将指针加1*/
    s -> top ++;
    return 1;
}
int pop (SqStack *s, char *e) {    /*出栈*/
    if (s -> top == s -> base) {    /*栈空*/
            return 1;
    } else {  
        
     *e=*s->base;
     s->base++;
     
    }
    return 0;
}
int recover(SqStack s,int size){  /*恢复栈底指针指向*/
    s.base = s.top - size;
    return 0;
}

_Bool isEmpty (SqStack *s) {    /*判断栈是否为空*/
    if (s -> top == s -> base) {
        return 1;
    } else {
        return 0;
    }
}

int reservedWord(SqStack s,char *yytext)    // 保留字
{
    char remains[13][100] =
        {
            "const",
            "var",
            "procedure",
            "begin",
            "end",
            "if",
            "then",
            "while",
            "do",
            "read",
            "call",
            "writeln"
        };
    int size = s.top - s.base;
    //printf("输出文件的Size: %d\n",size);
    char temp[size];
    memset(temp, '\0', size);
    int t = 0;
    while(isEmpty(&s) == false){
        char val;
        pop(&s,&val);
        // printf("输出栈中值:%c\n",val);
         if((val>= 65 && val <= 90) || (val >= 97 && val <= 122))//判断保留字 
            {
                //  printf("输出栈中值:%c\n",val);
                temp[t++] = val;
                for (int a = 0; a < 13; a++)
                {
                    if (strcmp(temp, remains[a]) == 0)
                    {
                        strcat(yytext,remains[a]);
                        strcat(yytext," ");
                        memset(temp, '\0', t);
                        t = 0;
                    }
                }
            }
            else
            {
                if(strcmp(temp,"write") == 0)
                {
                    strcat(yytext,"write");
                    strcat(yytext," ");
                }
                memset(temp, '\0', t);
                t = 0;
            }
    }
    s.base = s.top - size; //恢复指针指向;
    
}
void operator_(SqStack s,char *yytext){  // 运算符
    char val;
    while(isEmpty(&s) == false){
    pop(&s,&val);
    switch(val){
        case '+':
            strcat(yytext,"\\+");
            strcat(yytext," ");
            break;
        case '-':
            strcat(yytext,"\\-");
            strcat(yytext," ");
            break;
        case '/':
            if(*(s.base) == '*'){
            strcat(yytext,"/*");
            strcat(yytext," ");
            s.base++;
            }
            else{
            strcat(yytext,"\\/");
            strcat(yytext," ");  
            } 
            break;
        case '*':
            strcat(yytext,"\\*");
            strcat(yytext," ");
            break;
        case ':': 
            if(*(s.base) == '='){
            strcat(yytext,":=");
            strcat(yytext," ");
            s.base++;
            }   
            break;
        case '>':
            if(*(s.base) == '='){
            strcat(yytext,">=");
            strcat(yytext," ");
            s.base++;
            }
            else{
            strcat(yytext,">");
            strcat(yytext," ");   
            } 
            break;
        case '<':
            if(*(s.base) == '='){
            strcat(yytext,"<=");
            strcat(yytext," ");
            s.base++;
            }
            else{
            strcat(yytext,"<");
            strcat(yytext," ");   
            } 
            break;
        case '#':
            strcat(yytext,"#");
            strcat(yytext," ");
            break;
        case '=':
            strcat(yytext,"=");
            strcat(yytext," ");
            break;
    }
    }
}
int constant(SqStack s, char *yytext){   //数字 0 ~ 9
    char val;
    char a[30];
    memset(a, '\0', 30);
    int i = 0;
    while(isEmpty(&s) == false){
        pop(&s,&val);
        if((val >= 48 && val <= 57)){
            a[i++] = val;
            a[i++] = ' '; 
        }
    }
    strcat(yytext,a);
    strcat(yytext," ");
}
void identfier(SqStack s, char *yytext){  // 标识符
char remains[13][100] =
        {
            "const",
            "var",
            "procedure",
            "begin",
            "end",
            "if",
            "then",
            "while",
            "do",
            "read",
            "call",
            "writeln"
        };
    int size = s.top - s.base;
    char temp[size];
    memset(temp, '\0', size);
    int t = 0;
    while(isEmpty(&s) == false){
        char val;
        pop(&s,&val);
        // printf("输出栈中值:%c\n",val);
         if((val>= 65 && val <= 90) || (val >= 97 && val <= 122) || (val >= 48 && val <= 57))//判断基本字和标识符 
            {
                //  printf("输出栈中值:%c\n",val);
                temp[t++] = val;
                for (int a = 0; a < 13; a++)
                {
                    if (strcmp(temp, remains[a]) == 0){
                        memset(temp, '\0', t);
                        t = 0;
                    }
                }
            }
            else
            {
                if(strcmp(temp,"write") == 0)
                {
                    memset(temp, '\0', t);
                    t = 0;
                }
                if(temp[0] >= 48 && temp[0] <= 57){
                    memset(temp, '\0', t);
                    t = 0;
                }
                if(temp[0] != '\0'){
                strcat(yytext,temp);
                strcat(yytext," ");
                }
                memset(temp, '\0', t);
                t = 0;
            }
    }
}

void delimiter(SqStack s, char *yytext){  //分隔符
    char val;
    while(isEmpty(&s) == false){
        pop(&s,&val);
        switch(val){
            case ':':
                strcat(yytext,":");
                strcat(yytext," ");
                break;
            case '.':
                strcat(yytext,".");
                strcat(yytext," ");
                break;
            case ';':
                strcat(yytext,";");
                strcat(yytext," ");
                break;
        }
    }
}
int yywrap()
{
        return 1;
}
int yylex(SqStack s)
{
    char yytext[1024];
    memset(yytext, '\0', strlen(yytext));
    int count = 1;
    int size = s.top - s.base;
    while(count < 6){
    if(count == 1){
    reservedWord(s,yytext);
    printf("\t%d\t(1,‘ %s’)\n",count,yytext);
    memset(yytext, '\0', strlen(yytext));
    count++;
    }
    else if(count == 2){
        operator_(s,yytext);
        printf("\t%d\t(2,‘ %s’)\n",count,yytext);
        recover(s,size);
        memset(yytext, '\0', strlen(yytext));
        count++;
    }
    else if(count == 3){
        delimiter(s,yytext);
        printf("\t%d\t(3,‘ %s’)\n",count,yytext);
        recover(s,size);
        memset(yytext, '\0', strlen(yytext));
        count++;
    }
    else if(count == 4){
        constant(s,yytext);
        printf("\t%d\t(4,‘ %s’)\n",count,yytext);
        recover(s,size);
        memset(yytext, '\0', strlen(yytext));
        count++;
    }
    else if(count == 5){
        identfier(s,yytext);
        printf("\t%d\t(5,‘ %s’)\n",count,yytext);
        recover(s,size);
        memset(yytext, '\0', strlen(yytext));
        count++;
    }
    // whitespace(); /* do    nothing*/ 
    // if(count > 5)
    }
    printf("文件读取成功!yywrap返回值:%d\n",yywrap());
}

int main()
{
    system("color 4");
    SqStack s;
    InitStack(&s);
    char yyout;
    char file[1024];
    int len = 0;
    FILE *yyin;
    printf("词法分析器输出类型说明:\n");

    printf("1:保留字\n");

    printf("2:运算符\n");

    printf("3:分界符\n");

    printf("4:常  数\n");

    printf("5:标识符\n");

    printf("\n");
    yyin = fopen("example.txt","r");
    if(yyin == NULL){
        printf("文件打开失败!");
    }
    else{
        printf("文件打开成功!\n");
        yyout = fgetc(yyin);
        while(!feof(yyin)){
            push(&s,yyout);
            len++;
            yyout = fgetc(yyin);
        }
          yylex(s);
    }
        fclose(yyin);

    system("PAUSE");/*暂停*/

}

​ 当然,代码还有很多缺点,例如,代码很长,有很多地方可以整合缩减。也有很多地方数据冗余,可能是调试的时候忘记删除了,不过输出结果还是符合实验要求的。

内存空间的扩充
快来做第一个评论的人吧~