当前位置：首页 > news >正文

《Linux 基础 IO 完全指南：从文件描述符到缓冲区》

news 2025/9/29 15:51:25

《Linux 基础 IO 完全指南：从文件描述符到缓冲区》

文章目录

《Linux 基础 IO 完全指南：从文件描述符到缓冲区》
一、理解"文件"
- 1.1 狭义和广义上理解文件
- 1.2 文件操作的归类认知
- 1.3 系统角度
二、回顾C文件接口
- 2.1 hello.c打开文件
- 2.2 hello.c写文件
- 2.3 hello.c读文件
- 2.4 输出信息到显示器
- 2.5 stdin & stdout & stderr
- 2.6 打开文件的方式
三、系统文件 I/O
- 3.1 一种传递标志位的方法
- 3.2 hello.c写文件：
- 3.3 hello.c读文件：
- 3.4 接口介绍
- 3.5 open函数返回值
- 3.6 文件描述符 fd
- - 3.6.1 0 & 1 & 2
  - 3.6.2 文件描述符的分配规则
  - 3.6.3 重定向
  - 3.6.4 使用dup2系统调用
  - 3.6.5 在minishell中添加重定向功能
四、理解"一切皆文件"
五、缓冲区
- 5.1 什么是缓冲区
- 5.2 为什么要引入缓冲区机制
- 5.3 缓冲区类型
- 5.4 FILE
- 5.5 简单设计一下libc库
- - my_stdio.h
  - my_stdio.c
  - main.c

一、理解"文件"

在这里插入图片描述

1.1 狭义和广义上理解文件

在这里插入图片描述

1.2 文件操作的归类认知

在这里插入图片描述

1.3 系统角度

在这里插入图片描述

二、回顾C文件接口

2.1 hello.c打开文件

在这里插入图片描述

2.2 hello.c写文件

在这里插入图片描述

2.3 hello.c读文件

在这里插入图片描述
稍作修改，实现简单的cat命令

2.4 输出信息到显示器

在这里插入图片描述

2.5 stdin & stdout & stderr

在这里插入图片描述

2.6 打开文件的方式

在这里插入图片描述

三、系统文件 I/O

打开⽂件的⽅式不仅仅是fopen，ifstream等流式，语⾔层的⽅案，其实系统才是打开⽂件最底层的⽅案
在学习系统⽂件IO之前，先要了解下如何给函数传递标志位

3.1 一种传递标志位的方法

在这里插入图片描述
操作⽂件，除了上⼩节的C接⼝
（当然，C++也有接⼝，其他语⾔也有），
我们还可以采⽤系统接⼝来进⾏⽂件访问，
先来直接以系统代码的形式，实现和上⾯⼀模⼀样的代码

3.2 hello.c写文件：

代码如下（示例）：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
int main()
{umask(0);int fd = open("myfile", O_WRONLY | O_CREAT, 0644);if (fd < 0) {perror("open");return 1;}int count = 5;const char* msg = "hello bit!\n";int len = strlen(msg);while (count--) {write(fd, msg, len);//fd: 后⾯讲， msg：缓冲区⾸地址， len: 本次读取，期望// 写⼊多少个字节的数据。 返回值：实际写了多少字节数据}close(fd);return 0;
}

3.3 hello.c读文件：

代码如下（示例）：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
int main()
{int fd = open("myfile", O_RDONLY);if (fd < 0) {perror("open");return 1;}const char* msg = "hello bit!\n";char buf[1024];while (1) {ssize_t s = read(fd, buf, strlen(msg));//类⽐writeif (s > 0) {printf("%s", buf);}else {break;}}close(fd);return 0;
}

3.4 接口介绍

在这里插入图片描述

3.5 open函数返回值

在这里插入图片描述

3.6 文件描述符 fd

通过对open函数的学习，我们知道了文件描述符就是一个小整数

在这里插入图片描述

3.6.1 0 & 1 & 2

在这里插入图片描述

代码如下（示例）：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
int main()
{char buf[1024];ssize_t s = read(0, buf, sizeof(buf));if (s > 0) {buf[s] = 0;write(1, buf, strlen(buf));write(2, buf, strlen(buf));}return 0;
}

在这里插入图片描述

3.6.2 文件描述符的分配规则

在files_struct数组当中，找到当前没有被使用的最小的一个下标，
作为新的文件描述符

3.6.3 重定向

常见的重定向有：> ， >> ， <

在这里插入图片描述

3.6.4 使用dup2系统调用

在这里插入图片描述

3.6.5 在minishell中添加重定向功能

代码如下（示例）：

#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <ctype.h>
using namespace std;
const int basesize = 1024;
const int argvnum = 64;
const int envnum = 64;
// 全局的命令⾏参数表
char *gargv[argvnum];
int gargc = 0;
// 全局的变量
int lastcode = 0;
// 我的系统的环境变量
char *genv[envnum];
// 全局的当前shell⼯作路径
char pwd[basesize];
char pwdenv[basesize];
// 全局变量与重定向有关
#define NoneRedir 0
#define InputRedir 1
#define OutputRedir 2
#define AppRedir 3
int redir = NoneRedir;
char *filename = nullptr;
// " "file.txt
#define TrimSpace(pos) do{\
while(isspace(*pos)){\
pos++;\
}\
}while(0)
string GetUserName()
{
string name = getenv("USER");
return name.empty() ? "None" : name;
}
string GetHostName()
{
string hostname = getenv("HOSTNAME");
return hostname.empty() ? "None" : hostname;
}
string GetPwd()
{
if(nullptr == getcwd(pwd, sizeof(pwd))) return "None";
snprintf(pwdenv, sizeof(pwdenv),"PWD=%s", pwd);
putenv(pwdenv); // PWD=XXX
return pwd;
//string pwd = getenv("PWD");
//return pwd.empty() ? "None" : pwd;
}
string LastDir()
{
string curr = GetPwd();
if(curr == "/" || curr == "None") return curr;
// /home/whb/XXX
size_t pos = curr.rfind("/");
if(pos == std::string::npos) return curr;
return curr.substr(pos+1);
}
string MakeCommandLine()
{
// [whb@bite-alicloud myshell]$
char command_line[basesize];
snprintf(command_line, basesize, "[%s@%s %s]# ",\
GetUserName().c_str(), GetHostName().c_str(), LastDir().c_str());
return command_line;
}
void PrintCommandLine() // 1. 命令⾏提⽰符
{
printf("%s", MakeCommandLine().c_str());
fflush(stdout);
}
bool GetCommandLine(char command_buffer[], int size) // 2. 获取⽤⼾命令
{
// 我们认为：我们要将⽤⼾输⼊的命令⾏，当做⼀个完整的字符串
// "ls -a -l -n"
char *result = fgets(command_buffer, size, stdin);
if(!result)
{
return false;
}
command_buffer[strlen(command_buffer)-1] = 0;
if(strlen(command_buffer) == 0) return false;
return true;
}
void ResetCommandline()
{
memset(gargv, 0, sizeof(gargv));
gargc = 0;
// 重定向
redir = NoneRedir;
filename = nullptr;
}
void ParseRedir(char command_buffer[], int len)
{
int end = len - 1;
while(end >= 0)
{
if(command_buffer[end] == '<')
{
redir = InputRedir;
command_buffer[end] = 0;
filename = &command_buffer[end] + 1;
TrimSpace(filename);
break;
}
else if(command_buffer[end] == '>')
{
if(command_buffer[end-1] == '>')
{
redir = AppRedir;
command_buffer[end] = 0;
command_buffer[end-1] = 0;
filename = &command_buffer[end]+1;
TrimSpace(filename);
break;
}
else
{
redir = OutputRedir;
command_buffer[end] = 0;
filename = &command_buffer[end]+1;
TrimSpace(filename);
break;
}
}
else
{
end--;
}
}
}
void ParseCommand(char command_buffer[])
{
// "ls -a -l -n"
const char *sep = " ";
gargv[gargc++] = strtok(command_buffer, sep);
// =是刻意写的
while((bool)(gargv[gargc++] = strtok(nullptr, sep)));
gargc--;
}
void ParseCommandLine(char command_buffer[], int len) // 3. 分析命令
{
ResetCommandline();
ParseRedir(command_buffer, len);
ParseCommand(command_buffer);
//printf("command start: %s\n", command_buffer);
// "ls -a -l -n"
// "ls -a -l -n" > file.txt
// "ls -a -l -n" < file.txt
// "ls -a -l -n" >> file.txt
//printf("redir: %d\n", redir);
//printf("filename: %s\n", filename);
//printf("command end: %s\n", command_buffer);
}
void debug()
{
printf("argc: %d\n", gargc);
for(int i = 0; gargv[i]; i++)
{
printf("argv[%d]: %s\n", i, gargv[i]);
}
}
//enum
//{
// FILE_NOT_EXISTS = 1,
// OPEN_FILE_ERROR,
//};
void DoRedir()
{
// 1. 重定向应该让⼦进程⾃⼰做！
// 2. 程序替换会不会影响重定向?不会
// 0. 先判断 && 重定向
if(redir == InputRedir)
{
if(filename)
{
int fd = open(filename, O_RDONLY);
if(fd < 0)
{
exit(2);
}
dup2(fd, 0);
}
else
{
exit(1);
}
}
else if(redir == OutputRedir)
{
if(filename)
{
int fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
if(fd < 0)
{
exit(4);
}
dup2(fd, 1);
}
else
{
exit(3);
}
}
else if(redir == AppRedir)
{
if(filename)
{
int fd = open(filename, O_CREAT | O_WRONLY | O_APPEND, 0666);
if(fd < 0)
{
exit(6);
}
dup2(fd, 1);
}
else
{
exit(5);
}
}
else
{
// 没有重定向,Do Nothong!
}
}
// 在shell中
// 有些命令，必须由⼦进程来执⾏
// 有些命令，不能由⼦进程执⾏，要由shell⾃⼰执⾏ --- 内建命令 built command
bool ExecuteCommand() // 4. 执⾏命令
{
// 让⼦进程进⾏执⾏
pid_t id = fork();
if(id < 0) return false;
if(id == 0)
{
//⼦进程
DoRedir();
// 1. 执⾏命令
execvpe(gargv[0], gargv, genv);
// 2. 退出
exit(7);
}
int status = 0;
pid_t rid = waitpid(id, &status, 0);
if(rid > 0)
{
if(WIFEXITED(status))
{
lastcode = WEXITSTATUS(status);
}
else
{
lastcode = 100;
}
return true;
}
return false;
}
void AddEnv(const char *item)
{
int index = 0;
while(genv[index])
{
index++;
}
genv[index] = (char*)malloc(strlen(item)+1);
strncpy(genv[index], item, strlen(item)+1);
genv[++index] = nullptr;
}
// shell⾃⼰执⾏命令，本质是shell调⽤⾃⼰的函数
bool CheckAndExecBuiltCommand()
{
if(strcmp(gargv[0], "cd") == 0)
{
// 内建命令
if(gargc == 2)
{
chdir(gargv[1]);
lastcode = 0;
}
else
{
lastcode = 1;
}
return true;
}
else if(strcmp(gargv[0], "export") == 0)
{
// export也是内建命令
if(gargc == 2)
{
AddEnv(gargv[1]);
lastcode = 0;
}
else
{
lastcode = 2;
}
return true;
}
else if(strcmp(gargv[0], "env") == 0)
{
for(int i = 0; genv[i]; i++)
{
printf("%s\n", genv[i]);
}
lastcode = 0;
return true;
}
else if(strcmp(gargv[0], "echo") == 0)
{
if(gargc == 2)
{
// echo $?
/ echo $PATH
// echo hello
if(gargv[1][0] == '$')
{
if(gargv[1][1] == '?')
{
printf("%d\n", lastcode);
lastcode = 0;
}
}
else
{
printf("%s\n", gargv[1]);
lastcode = 0;
}
}
else
{
lastcode = 3;
}
return true;
}
return false;
}
// 作为⼀个shell，获取环境变量应该从系统的配置来
// 我们今天就直接从⽗shell中获取环境变量
void InitEnv()
{
extern char **environ;
int index = 0;
while(environ[index])
{
genv[index] = (char*)malloc(strlen(environ[index])+1);
strncpy(genv[index], environ[index], strlen(environ[index])+1);
index++;
}
genv[index] = nullptr;
}
int main()
{
InitEnv();
char command_buffer[basesize];
while(true)
{
PrintCommandLine(); // 1. 命令⾏提⽰符
// command_buffer -> output
if( !GetCommandLine(command_buffer, basesize) ) // 2. 获取⽤⼾命令
{
continue;
}
//printf("%s\n", command_buffer);
//ls
//"ls -a -b -c -d"->"ls" "-a" "-b" "-c" "-d"
//"ls -a -b -c -d">hello.txt
//"ls -a -b -c -d">>hello.txt
//"ls -a -b -c -d"<hello.txt
ParseCommandLine(command_buffer, strlen(command_buffer)); // 3. 分析命
令
if ( CheckAndExecBuiltCommand() )
{
continue;
}
ExecuteCommand(); // 4. 执⾏命令
}
return 0;
}

四、理解"一切皆文件"

在这里插入图片描述

五、缓冲区

5.1 什么是缓冲区

在这里插入图片描述

5.2 为什么要引入缓冲区机制

在这里插入图片描述

5.3 缓冲区类型

在这里插入图片描述

–

5.4 FILE

在这里插入图片描述

5.5 简单设计一下libc库

my_stdio.h

代码如下（示例）：

$ cat my_stdio.h
#pragma once
#define SIZE 1024
#define FLUSH_NONE 0
#define FLUSH_LINE 1
#define FLUSH_FULL 2
struct IO_FILE
{
int flag; // 刷新⽅式
int fileno; // ⽂件描述符
char outbuffer[SIZE];
int cap;
int size;
// TODO
};
typedef struct IO_FILE mFILE;
mFILE *mfopen(const char *filename, const char *mode);
int mfwrite(const void *ptr, int num, mFILE *stream);
void mfflush(mFILE *stream);
void mfclose(mFILE *stream);

my_stdio.c

代码如下（示例）：

$ cat my_stdio.c
#include "my_stdio.h"
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
mFILE *mfopen(const char *filename, const char *mode)
{
int fd = -1;
if(strcmp(mode, "r") == 0)
{
fd = open(filename, O_RDONLY);
}
else if(strcmp(mode, "w")== 0)
{
fd = open(filename, O_CREAT|O_WRONLY|O_TRUNC, 0666);
}
else if(strcmp(mode, "a") == 0)
{
fd = open(filename, O_CREAT|O_WRONLY|O_APPEND, 0666);
}
if(fd < 0) return NULL;
mFILE *mf = (mFILE*)malloc(sizeof(mFILE));
if(!mf)
{
close(fd);
return NULL;
}
mf->fileno = fd;
mf->flag = FLUSH_LINE;
mf->size = 0;
mf->cap = SIZE;
return mf;
}
void mfflush(mFILE *stream)
{
if(stream->size > 0)
{
// 写到内核⽂件的⽂件缓冲区中!
write(stream->fileno, stream->outbuffer, stream->size);
// 刷新到外设
fsync(stream->fileno);
stream->size = 0;
}
}
int mfwrite(const void *ptr, int num, mFILE *stream)
{
// 1. 拷⻉
memcpy(stream->outbuffer+stream->size, ptr, num);
stream->size += num;
// 2. 检测是否要刷新
if(stream->flag == FLUSH_LINE && stream->size > 0 && stream-
>outbuffer[stream->size-1]== '\n')
{
mfflush(stream);
}
return num;
}
void mfclose(mFILE *stream)
{
if(stream->size > 0)
{
mfflush(stream);
}
close(stream->fileno);
}

main.c

代码如下（示例）：

$ cat main.c
#include "my_stdio.h"
#include <stdio.h>
#include <string.h>
#include <unistd.h>
int main()
{
mFILE *fp = mfopen("./log.txt", "a");
if(fp == NULL)
{
return 1;
}
int cnt = 10;
while(cnt)
{
printf("write %d\n", cnt);
char buffer[64];
snprintf(buffer, sizeof(buffer),"hello message, number is : %d", cnt);
cnt--;
mfwrite(buffer, strlen(buffer), fp);
mfflush(fp);
sleep(1);
}
mfclose(fp);
}