当前位置: 首页 > news >正文

编译原理实验报告——词法分析程序

开发环境

软、硬件:VS2022

开发语言:C语言

开发环境: VS2022

名称   词法分析程序(2学时)

实验目的

理解词法分析在编译程序中的作用;加深对有穷自动机模型的理解;掌握词法分析程序的实现方法和技术。

实验内容

选择部分C语言的语法成分,设计其词法分析程序,要求能够识别关键字、运算符、分界符、标识符、常量(至少是整型常量,可以自己扩充识别其他常量)等,并能处理注释、部分复合运算符(如>=等)。单词以二元式形式输出、输出有词法错误的单词及所在行号。

实验要求

(1)待分析的简单的语法

   关键字:begin if then while do end …

     运算符和界符::= + - * / < <= > >= <> = == ; ( ) # , …

   其他单词是标识符id和整型常数num,通过以下正规式定义:

     id=l(l|d)*  (l:letter d:digit)

     num=dd*

     空格、注释:在词法分析中要去掉。

(2)各种单词符号对应的种别编码(参考这张表,可以不同)

(3)待分析的源程序:

   (a)int main() 

{

   int a=1,b=2;

   b/a;

   /* 注释部分*/

  b>a;

  c=a+b;

  cout<<c;

  return 0;

}

(b)这个待分析程序有词法错误(选做)

while ((a+15)>0)

{

if (2x == 7)

i3=z;

        }

#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>// 定义单词种别码
#define KEYWORD 1
#define OPERATOR 2
#define DELIMITER 3
#define IDENTIFIER 4
#define INTEGER 5
#define ERROR 6// 关键字表
const char* keywords[] = { "while", "if", "int", "float", "char", "return" };// 运算符和界符表
const char operators[] = "+-*/<>=!&|";
const char delimiters[] = ";(),{}[]#";// 判断是否为关键字
int isKeyword(const char* word) {for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {if (strcmp(word, keywords[i]) == 0) {return 1;}}return 0;
}// 判断是否为运算符
int isOperator(char ch) {return strchr(operators, ch) != NULL;
}// 判断是否为界符
int isDelimiter(char ch) {return strchr(delimiters, ch) != NULL;
}// 打印源程序内容
void printSourceCode(FILE* fp) {char ch;int line_num = 1;printf("=== 源程序内容 ===\n");rewind(fp);printf("%4d: ", line_num);while ((ch = fgetc(fp)) != EOF) {putchar(ch);if (ch == '\n') {line_num++;printf("%4d: ", line_num);}}printf("\n=== 词法分析结果 ===\n");
}// 词法分析函数
void lexer(FILE* fp) {char ch, buffer[100];int buffer_index = 0;int line_number = 1;int last_token_was_identifier = 0;rewind(fp);while ((ch = fgetc(fp)) != EOF) {// 跳过空白字符if (ch == ' ' || ch == '\t') {continue;}// 处理换行else if (ch == '\n') {line_number++;}// 处理注释else if (ch == '/') {char next = fgetc(fp);if (next == '/') {while ((ch = fgetc(fp)) != '\n' && ch != EOF);line_number++;}else if (next == '*') {while (1) {ch = fgetc(fp);if (ch == '\n') line_number++;if (ch == EOF) {printf("Error (Line %d): Unclosed comment\n", line_number);break;}if (ch == '*' && (ch = fgetc(fp)) == '/') {break;}}}else {printf("<%d, %c>\n", OPERATOR, '/');ungetc(next, fp);}}// 处理标识符和关键字else if (isalpha(ch) || ch == '_') {buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';buffer_index = 0;if (isKeyword(buffer)) {printf("<%d, %s>\n", KEYWORD, buffer);last_token_was_identifier = 0;}else {printf("<%d, %s>\n", IDENTIFIER, buffer);last_token_was_identifier = 1;}if (ch != EOF) ungetc(ch, fp);}// 处理数字常量或错误标识符else if (isdigit(ch)) {buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isdigit(ch) || isalpha(ch))) {if (isalpha(ch)) {// 数字后跟字母,是错误标识符buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';printf("Error (Line %d): Invalid identifier '%s' (cannot start with digit)\n",line_number, buffer);buffer_index = 0;if (ch != EOF) ungetc(ch, fp);continue;}buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';buffer_index = 0;printf("<%d, %s>\n", INTEGER, buffer);last_token_was_identifier = 0;if (ch != EOF) ungetc(ch, fp);}// 处理运算符和复合运算符else if (isOperator(ch)) {char next = fgetc(fp);if (ch == '=' && next == '=') {printf("<%d, ==>\n", OPERATOR);}else if (ch == '!' && next == '=') {printf("<%d, !=>\n", OPERATOR);}else if (ch == '<' && next == '=') {printf("<%d, <=>\n", OPERATOR);}else if (ch == '>' && next == '=') {printf("<%d, >=>\n", OPERATOR);}else if (ch == '&' && next == '&') {printf("<%d, &&>\n", OPERATOR);}else if (ch == '|' && next == '|') {printf("<%d, ||>\n", OPERATOR);}else {printf("<%d, %c>\n", OPERATOR, ch);if (next != EOF) ungetc(next, fp);}last_token_was_identifier = 0;}// 处理界符else if (isDelimiter(ch)) {printf("<%d, %c>\n", DELIMITER, ch);if (ch == ';') {last_token_was_identifier = 0;}}// 处理错误字符else {printf("Error (Line %d): Illegal character '%c'\n", line_number, ch);}}
}int main(int argc, char* argv[]) {const char* filename = argc > 1 ? argv[1] : "test1.txt";// const char* filename = argc > 1 ? argv[1] : "test.c";FILE* fp;if (fopen_s(&fp, filename, "r") != 0 || fp == NULL) {printf("Error: Cannot open file %s\n", filename);return 1;}printSourceCode(fp);lexer(fp);fclose(fp);return 0;
}

核心代码:(按模块顺序,每一个模块先做简要功能说明及输入输出介绍,再附核心代码,如果只有代码,没有说明,酌情减分)

#define _CRT_SECURE_NO_WARNINGS

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <ctype.h>

// 定义单词种别码

#define KEYWORD 1

#define OPERATOR 2

#define DELIMITER 3

#define IDENTIFIER 4

#define INTEGER 5

#define ERROR 6

// 关键字表

const char* keywords[] = { "while", "if", "int", "float", "char", "return" };

// 运算符和界符表

const char operators[] = "+-*/<>=!&|";

const char delimiters[] = ";(),{}[]#";

// 一、判断是否为关键字核心代码

//功能:判断给定的字符串是否为预定义的关键字

//输入:字符串指针

//输出:1(是关键字)/0(不是关键字)

int isKeyword(const char* word) {

    for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {

        if (strcmp(word, keywords[i]) == 0) {

            return 1;

        }

    }

    return 0;

}

// 二、判断是否为运算符核心代码

//功能:判断字符是否为预定义的运算符

//输入:单个字符

//输出:1(是运算符)/0(不是运算符)

int isOperator(char ch) {

    return strchr(operators, ch) != NULL;

}

// 三、判断是否为界符核心代码

//功能:判断字符是否为预定义的界符

//输入:单个字符

//输出:1(是界符)/0(不是界符)

int isDelimiter(char ch) {

    return strchr(delimiters, ch) != NULL;

}

// 四、打印源程序内容核心代码

//功能:读取源程序文件并打印带行号的源代码,为后续词法分析做准备

//输入:文件指针(已打开的文件)

//输出:带行号标注的源代码内容

void printSourceCode(FILE* fp) {

    char ch;

    int line_num = 1;

    printf("=== 源程序内容 ===\n");

    rewind(fp);

    printf("%4d: ", line_num);

    while ((ch = fgetc(fp)) != EOF) {

        putchar(ch);

        if (ch == '\n') {

            line_num++;

            printf("%4d: ", line_num);

        }

    }

    printf("\n=== 词法分析结果 ===\n");

}

// 五、词法分析函数核心代码

//功能:核心词法分析功能,识别各类单词符号并输出二元式

//输入:文件指针

//输出:单词的二元式(种别码,单词值)或错误信息

void lexer(FILE* fp) {

    char ch, buffer[100];

    int buffer_index = 0;

    int line_number = 1;

    int last_token_was_identifier = 0;

    rewind(fp);

    while ((ch = fgetc(fp)) != EOF) {

        // 跳过空白字符

        if (ch == ' ' || ch == '\t') {

            continue;

        }

        // 处理换行

        else if (ch == '\n') {

            line_number++;

        }

        // 处理注释

        else if (ch == '/') {

            char next = fgetc(fp);

            if (next == '/') {

                while ((ch = fgetc(fp)) != '\n' && ch != EOF);

                line_number++;

            }

            else if (next == '*') {

                while (1) {

                    ch = fgetc(fp);

                    if (ch == '\n') line_number++;

                    if (ch == EOF) {

                        printf("Error (Line %d): Unclosed comment\n", line_number);

                        break;

                    }

                    if (ch == '*' && (ch = fgetc(fp)) == '/') {

                        break;

                    }

                }

            }

            else {

                printf("<%d, %c>\n", OPERATOR, '/');

                ungetc(next, fp);

            }

        }

        // 处理标识符和关键字

        else if (isalpha(ch) || ch == '_') {

            buffer[buffer_index++] = ch;

            while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {

                buffer[buffer_index++] = ch;

            }

            buffer[buffer_index] = '\0';

            buffer_index = 0;

            if (isKeyword(buffer)) {

                printf("<%d, %s>\n", KEYWORD, buffer);

                last_token_was_identifier = 0;

            }

            else {

                printf("<%d, %s>\n", IDENTIFIER, buffer);

                last_token_was_identifier = 1;

            }

            if (ch != EOF) ungetc(ch, fp);

        }

        // 处理数字常量或错误标识符

        else if (isdigit(ch)) {

            buffer[buffer_index++] = ch;

            while ((ch = fgetc(fp)) != EOF && (isdigit(ch) || isalpha(ch))) {

                if (isalpha(ch)) {

                    // 数字后跟字母,是错误标识符

                    buffer[buffer_index++] = ch;

                    while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {

                        buffer[buffer_index++] = ch;

                    }

                    buffer[buffer_index] = '\0';

                    printf("Error (Line %d): Invalid identifier '%s' (cannot start with digit)\n",

                        line_number, buffer);

                    buffer_index = 0;

                    if (ch != EOF) ungetc(ch, fp);

                    continue;

                }

                buffer[buffer_index++] = ch;

            }

            buffer[buffer_index] = '\0';

            buffer_index = 0;

            printf("<%d, %s>\n", INTEGER, buffer);

            last_token_was_identifier = 0;

            if (ch != EOF) ungetc(ch, fp);

        }

        // 处理运算符和复合运算符

        else if (isOperator(ch)) {

            char next = fgetc(fp);

            if (ch == '=' && next == '=') {

                printf("<%d, ==>\n", OPERATOR);

            }

            else if (ch == '!' && next == '=') {

                printf("<%d, !=>\n", OPERATOR);

            }

            else if (ch == '<' && next == '=') {

                printf("<%d, <=>\n", OPERATOR);

            }

            else if (ch == '>' && next == '=') {

                printf("<%d, >=>\n", OPERATOR);

            }

            else if (ch == '&' && next == '&') {

                printf("<%d, &&>\n", OPERATOR);

            }

            else if (ch == '|' && next == '|') {

                printf("<%d, ||>\n", OPERATOR);

            }

            else {

                printf("<%d, %c>\n", OPERATOR, ch);

                if (next != EOF) ungetc(next, fp);

            }

            last_token_was_identifier = 0;

        }

        // 处理界符

        else if (isDelimiter(ch)) {

            printf("<%d, %c>\n", DELIMITER, ch);

            if (ch == ';') {

                last_token_was_identifier = 0;

            }

        }

        // 处理错误字符

        else {

            printf("Error (Line %d): Illegal character '%c'\n", line_number, ch);

        }

    }

}

int main(int argc, char* argv[]) {

    const char* filename = argc > 1 ? argv[1] : "test1.txt";

    FILE* fp;

    if (fopen_s(&fp, filename, "r") != 0 || fp == NULL) {

        printf("Error: Cannot open file %s\n", filename);

        return 1;

    }

    printSourceCode(fp);

    lexer(fp);

    fclose(fp);

    return 0;

}

test1.txt(放在根目录下)

while ((a+15)>0) 
{
if (2x == 7) 
i3=z;
}

http://www.dtcms.com/a/394548.html

相关文章:

  • 整体设计 完整的逻辑链条 之4 认知逻辑视角 —— 前序驱动的认知演进体系 之2
  • C/C++正则表达式PCRE2库
  • 基于python大数据的声乐信息分类评测系统
  • 永磁同步电机无速度算法--改进型超螺旋滑模观测器
  • Linux0.12的中断处理过程源码分析
  • 进程控制(Linux)
  • 【C++】——string类的使用(详细讲解)
  • 借助 Amazon ECS 全新的内置蓝绿部署功能,加速安全的软件发布进程
  • 【脑电分析系列】第24篇:运动想象BCI系统构建:CSP+LDA/SVM与深度学习方法的对比研究
  • 【论文速递】2025年第22周(May-25-31)(Robotics/Embodied AI/LLM)
  • MySQL 5.7 多实例部署完整指南(基于二进制包)
  • Git的使用——Git命令、密钥/私钥、文件推送/提交、分支增删改查、文件回滚、.gitignore文件忽略
  • [已更新]2025华为杯D题数学建模研赛D题研究生数学建模思路代码文章成品:低空湍流监测及最优航路规划
  • [C++类的默认成员函数——lesson5.构造函数析构函数]
  • 第二十七章 ESP32S3 INFRARED_TRANSMISSION 实验
  • ✅ Python车牌识别计费系统 PyQt5界面 YOLOv5+CRNN 深度学习 MySQL可视化 车牌检测(建议收藏)
  • 盛水最多的容器_优选算法(C++)双指针
  • QT-串口,完结!
  • Git常用命令合集
  • Qt(模态对话框的切换)
  • QT-模型视图结构
  • C语言 C语句
  • 《理解Reactor网络编程模型》
  • Mirror Maze 镜面反射
  • 一个案例弄懂nfs
  • 在飞牛NAS使用Lucky做动态解析到域名?
  • 多实例 MySQL 部署
  • 使用批处理脚本快速切换 Claude API 实现多平台环境配置
  • SkyDiffusion:用 BEV 视角打开街景→航拍图像合成新范式
  • 免费下载适用于 Windows PC 的 Pixologic Zbrush 2026