编译原理实验报告——词法分析程序
开发环境 | 软、硬件:VS2022 开发语言:C语言 开发环境: VS2022 |
名称 词法分析程序(2学时) |
实验目的 理解词法分析在编译程序中的作用;加深对有穷自动机模型的理解;掌握词法分析程序的实现方法和技术。 |
实验内容 选择部分C语言的语法成分,设计其词法分析程序,要求能够识别关键字、运算符、分界符、标识符、常量(至少是整型常量,可以自己扩充识别其他常量)等,并能处理注释、部分复合运算符(如>=等)。单词以二元式形式输出、输出有词法错误的单词及所在行号。 |
实验要求 (1)待分析的简单的语法 关键字:begin if then while do end … 运算符和界符::= + - * / < <= > >= <> = == ; ( ) # , … 其他单词是标识符id和整型常数num,通过以下正规式定义: id=l(l|d)* (l:letter d:digit) num=dd* 空格、注释:在词法分析中要去掉。 (2)各种单词符号对应的种别编码(参考这张表,可以不同) (3)待分析的源程序: (a)int main() { int a=1,b=2; b/a; /* 注释部分*/ b>a; c=a+b; cout<<c; return 0; } (b)这个待分析程序有词法错误(选做) while ((a+15)>0) { if (2x == 7) i3=z; } |
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>// 定义单词种别码
#define KEYWORD 1
#define OPERATOR 2
#define DELIMITER 3
#define IDENTIFIER 4
#define INTEGER 5
#define ERROR 6// 关键字表
const char* keywords[] = { "while", "if", "int", "float", "char", "return" };// 运算符和界符表
const char operators[] = "+-*/<>=!&|";
const char delimiters[] = ";(),{}[]#";// 判断是否为关键字
int isKeyword(const char* word) {for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {if (strcmp(word, keywords[i]) == 0) {return 1;}}return 0;
}// 判断是否为运算符
int isOperator(char ch) {return strchr(operators, ch) != NULL;
}// 判断是否为界符
int isDelimiter(char ch) {return strchr(delimiters, ch) != NULL;
}// 打印源程序内容
void printSourceCode(FILE* fp) {char ch;int line_num = 1;printf("=== 源程序内容 ===\n");rewind(fp);printf("%4d: ", line_num);while ((ch = fgetc(fp)) != EOF) {putchar(ch);if (ch == '\n') {line_num++;printf("%4d: ", line_num);}}printf("\n=== 词法分析结果 ===\n");
}// 词法分析函数
void lexer(FILE* fp) {char ch, buffer[100];int buffer_index = 0;int line_number = 1;int last_token_was_identifier = 0;rewind(fp);while ((ch = fgetc(fp)) != EOF) {// 跳过空白字符if (ch == ' ' || ch == '\t') {continue;}// 处理换行else if (ch == '\n') {line_number++;}// 处理注释else if (ch == '/') {char next = fgetc(fp);if (next == '/') {while ((ch = fgetc(fp)) != '\n' && ch != EOF);line_number++;}else if (next == '*') {while (1) {ch = fgetc(fp);if (ch == '\n') line_number++;if (ch == EOF) {printf("Error (Line %d): Unclosed comment\n", line_number);break;}if (ch == '*' && (ch = fgetc(fp)) == '/') {break;}}}else {printf("<%d, %c>\n", OPERATOR, '/');ungetc(next, fp);}}// 处理标识符和关键字else if (isalpha(ch) || ch == '_') {buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';buffer_index = 0;if (isKeyword(buffer)) {printf("<%d, %s>\n", KEYWORD, buffer);last_token_was_identifier = 0;}else {printf("<%d, %s>\n", IDENTIFIER, buffer);last_token_was_identifier = 1;}if (ch != EOF) ungetc(ch, fp);}// 处理数字常量或错误标识符else if (isdigit(ch)) {buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isdigit(ch) || isalpha(ch))) {if (isalpha(ch)) {// 数字后跟字母,是错误标识符buffer[buffer_index++] = ch;while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) {buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';printf("Error (Line %d): Invalid identifier '%s' (cannot start with digit)\n",line_number, buffer);buffer_index = 0;if (ch != EOF) ungetc(ch, fp);continue;}buffer[buffer_index++] = ch;}buffer[buffer_index] = '\0';buffer_index = 0;printf("<%d, %s>\n", INTEGER, buffer);last_token_was_identifier = 0;if (ch != EOF) ungetc(ch, fp);}// 处理运算符和复合运算符else if (isOperator(ch)) {char next = fgetc(fp);if (ch == '=' && next == '=') {printf("<%d, ==>\n", OPERATOR);}else if (ch == '!' && next == '=') {printf("<%d, !=>\n", OPERATOR);}else if (ch == '<' && next == '=') {printf("<%d, <=>\n", OPERATOR);}else if (ch == '>' && next == '=') {printf("<%d, >=>\n", OPERATOR);}else if (ch == '&' && next == '&') {printf("<%d, &&>\n", OPERATOR);}else if (ch == '|' && next == '|') {printf("<%d, ||>\n", OPERATOR);}else {printf("<%d, %c>\n", OPERATOR, ch);if (next != EOF) ungetc(next, fp);}last_token_was_identifier = 0;}// 处理界符else if (isDelimiter(ch)) {printf("<%d, %c>\n", DELIMITER, ch);if (ch == ';') {last_token_was_identifier = 0;}}// 处理错误字符else {printf("Error (Line %d): Illegal character '%c'\n", line_number, ch);}}
}int main(int argc, char* argv[]) {const char* filename = argc > 1 ? argv[1] : "test1.txt";// const char* filename = argc > 1 ? argv[1] : "test.c";FILE* fp;if (fopen_s(&fp, filename, "r") != 0 || fp == NULL) {printf("Error: Cannot open file %s\n", filename);return 1;}printSourceCode(fp);lexer(fp);fclose(fp);return 0;
}
核心代码:(按模块顺序,每一个模块先做简要功能说明及输入输出介绍,再附核心代码,如果只有代码,没有说明,酌情减分) #define _CRT_SECURE_NO_WARNINGS #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> // 定义单词种别码 #define KEYWORD 1 #define OPERATOR 2 #define DELIMITER 3 #define IDENTIFIER 4 #define INTEGER 5 #define ERROR 6 // 关键字表 const char* keywords[] = { "while", "if", "int", "float", "char", "return" }; // 运算符和界符表 const char operators[] = "+-*/<>=!&|"; const char delimiters[] = ";(),{}[]#"; // 一、判断是否为关键字—核心代码 //功能:判断给定的字符串是否为预定义的关键字 //输入:字符串指针 //输出:1(是关键字)/0(不是关键字) int isKeyword(const char* word) { for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) { if (strcmp(word, keywords[i]) == 0) { return 1; } } return 0; } // 二、判断是否为运算符—核心代码 //功能:判断字符是否为预定义的运算符 //输入:单个字符 //输出:1(是运算符)/0(不是运算符) int isOperator(char ch) { return strchr(operators, ch) != NULL; } // 三、判断是否为界符—核心代码 //功能:判断字符是否为预定义的界符 //输入:单个字符 //输出:1(是界符)/0(不是界符) int isDelimiter(char ch) { return strchr(delimiters, ch) != NULL; } // 四、打印源程序内容—核心代码 //功能:读取源程序文件并打印带行号的源代码,为后续词法分析做准备 //输入:文件指针(已打开的文件) //输出:带行号标注的源代码内容 void printSourceCode(FILE* fp) { char ch; int line_num = 1; printf("=== 源程序内容 ===\n"); rewind(fp); printf("%4d: ", line_num); while ((ch = fgetc(fp)) != EOF) { putchar(ch); if (ch == '\n') { line_num++; printf("%4d: ", line_num); } } printf("\n=== 词法分析结果 ===\n"); } // 五、词法分析函数—核心代码 //功能:核心词法分析功能,识别各类单词符号并输出二元式 //输入:文件指针 //输出:单词的二元式(种别码,单词值)或错误信息 void lexer(FILE* fp) { char ch, buffer[100]; int buffer_index = 0; int line_number = 1; int last_token_was_identifier = 0; rewind(fp); while ((ch = fgetc(fp)) != EOF) { // 跳过空白字符 if (ch == ' ' || ch == '\t') { continue; } // 处理换行 else if (ch == '\n') { line_number++; } // 处理注释 else if (ch == '/') { char next = fgetc(fp); if (next == '/') { while ((ch = fgetc(fp)) != '\n' && ch != EOF); line_number++; } else if (next == '*') { while (1) { ch = fgetc(fp); if (ch == '\n') line_number++; if (ch == EOF) { printf("Error (Line %d): Unclosed comment\n", line_number); break; } if (ch == '*' && (ch = fgetc(fp)) == '/') { break; } } } else { printf("<%d, %c>\n", OPERATOR, '/'); ungetc(next, fp); } } // 处理标识符和关键字 else if (isalpha(ch) || ch == '_') { buffer[buffer_index++] = ch; while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) { buffer[buffer_index++] = ch; } buffer[buffer_index] = '\0'; buffer_index = 0; if (isKeyword(buffer)) { printf("<%d, %s>\n", KEYWORD, buffer); last_token_was_identifier = 0; } else { printf("<%d, %s>\n", IDENTIFIER, buffer); last_token_was_identifier = 1; } if (ch != EOF) ungetc(ch, fp); } // 处理数字常量或错误标识符 else if (isdigit(ch)) { buffer[buffer_index++] = ch; while ((ch = fgetc(fp)) != EOF && (isdigit(ch) || isalpha(ch))) { if (isalpha(ch)) { // 数字后跟字母,是错误标识符 buffer[buffer_index++] = ch; while ((ch = fgetc(fp)) != EOF && (isalnum(ch) || ch == '_')) { buffer[buffer_index++] = ch; } buffer[buffer_index] = '\0'; printf("Error (Line %d): Invalid identifier '%s' (cannot start with digit)\n", line_number, buffer); buffer_index = 0; if (ch != EOF) ungetc(ch, fp); continue; } buffer[buffer_index++] = ch; } buffer[buffer_index] = '\0'; buffer_index = 0; printf("<%d, %s>\n", INTEGER, buffer); last_token_was_identifier = 0; if (ch != EOF) ungetc(ch, fp); } // 处理运算符和复合运算符 else if (isOperator(ch)) { char next = fgetc(fp); if (ch == '=' && next == '=') { printf("<%d, ==>\n", OPERATOR); } else if (ch == '!' && next == '=') { printf("<%d, !=>\n", OPERATOR); } else if (ch == '<' && next == '=') { printf("<%d, <=>\n", OPERATOR); } else if (ch == '>' && next == '=') { printf("<%d, >=>\n", OPERATOR); } else if (ch == '&' && next == '&') { printf("<%d, &&>\n", OPERATOR); } else if (ch == '|' && next == '|') { printf("<%d, ||>\n", OPERATOR); } else { printf("<%d, %c>\n", OPERATOR, ch); if (next != EOF) ungetc(next, fp); } last_token_was_identifier = 0; } // 处理界符 else if (isDelimiter(ch)) { printf("<%d, %c>\n", DELIMITER, ch); if (ch == ';') { last_token_was_identifier = 0; } } // 处理错误字符 else { printf("Error (Line %d): Illegal character '%c'\n", line_number, ch); } } } int main(int argc, char* argv[]) { const char* filename = argc > 1 ? argv[1] : "test1.txt"; FILE* fp; if (fopen_s(&fp, filename, "r") != 0 || fp == NULL) { printf("Error: Cannot open file %s\n", filename); return 1; } printSourceCode(fp); lexer(fp); fclose(fp); return 0; } |
test1.txt(放在根目录下)
while ((a+15)>0)
{
if (2x == 7)
i3=z;
}