Linux笔记---基于HTTP协议搭建一个简单的Web服务器
1. 原理
https://blog.csdn.net/2302_80372340/article/details/151611390?spm=1011.2415.3001.5331
上面这篇文章已经详细讨论过HTTP协议是如何运作的了。简单来说,我们要在我们的服务器上做下面几件事:
- 接收来自客户端(浏览器)的HTTP请求
- 反序列化HTTP请求
- 读取请求行的URI字段,读取客户端要求的网页对象文件
- 将网页对象文件的数据包装为HTTP响应报文
- 序列化响应报文,回送给客户端
网页对象文件通常就是HTML、JavaScript、CSS以及图片、声音、文本等文件。
总之,我们要做的就是用代码来实现HTTP协议。
2. 协议的实现
2.1 报文的定义
首先,我们要根据HTTP协议描述的请求报文与响应报文,定义出结构化的请求报文和响应报文,并实现二者的序列化与反序列化方法。
如果只考虑服务端的话,我们只需要实现请求报文的反序列化方法和响应报文的序列化方法。
我们注意到,请求报文与响应报文最大的区别在于首行,而其他部分均大同小异,所以我们可以首先定义出HttpMessage类作为二者的父类,实现二者共同的部分:
class HttpMessage
{
public:// 向_headers中添加键值对bool AddHeader(const std::string &header){auto pos = header.find(_HeaderSep);if (pos == std::string::npos){return false;}std::string key = header.substr(0, pos);std::string value = header.substr(pos + _HeaderSep.size());_headers[key] = value;return true;}bool AddHeader(const std::string &key, const std::string &value){_headers[key] = value;return true;}protected:static const std::string _Space; // 空格static const std::string _LineBreak; // 换行符static const std::string _BlankLine; // 空行static const std::string _HeaderSep; // 报头分割符std::string _version; // Http版本std::unordered_map<std::string, std::string> _headers; // 请求/响应报头std::string _data; // 请求/响应正文
};
const std::string HttpMessage::_Space = " ";
const std::string HttpMessage::_LineBreak = "\r\n";
const std::string HttpMessage::_BlankLine = "\r\n";
const std::string HttpMessage::_HeaderSep = ": ";
2.1.1 请求报文
相比于HttpMessage,请求报文有两个特有的字段:HTTP请求方法和URI。
要完成请求报文的反序列化,我们需要一个函数来帮助我们逐行提取请求的内容:
class Util
{
public:static bool ReadLine(std::string &message, const std::string &sep, std::string &one_line){auto pos = message.find(sep);if (pos == std::string::npos){LOG(LogLevel::WARNING) << "Util::ReadLine: 未能提取出一行! ";return false;}one_line = message.substr(0, pos);message.erase(0, pos + sep.size());return true;}
};
为了简单起见,我们认为客户端的请求不包含请求正文部分,所以当我们遇到空行(即两个连续的换行符)时,就认为收到了一个完整的报文。
此时,我们需要先将请求行取出并解析,随后逐行读取请求报头并插入到_headers:
// 请求行: 方法 + 空格 + URI + 空格 + 版本 + 换行符
class HttpRequest : public HttpMessage
{
private:void PraseRequestLine(const std::string &request_line){std::stringstream buffer(request_line);buffer >> _method >> _uri >> _version;}public:HttpRequest() {}// 序列化// std::string Serialize() {}// 反序列化bool Deserialize(std::string &request_str){static std::string end = _LineBreak + _BlankLine;// 判断是否存在一个完整报文if (request_str.find(end) == std::string::npos){return false;}// 读取请求行std::string request_line;Util::ReadLine(request_str, _LineBreak, request_line);PraseRequestLine(request_line);// 读取请求报头std::string header;do{Util::ReadLine(request_str, _LineBreak, header);if (header.size() > 0){AddHeader(header);}} while (header.size() > 0); // 读取到空行结束return true;}const std::string &Uri() const { return _uri; }~HttpRequest() {}private:std::string _method; // Http请求方法std::string _uri; // URI
};
当然,我们只是做一个简单的服务器,虽然请求报头部分我们也做了序列化,但是我们并不打算对这部分做处理。
2.1.2 响应报文
相比于HttpMessage,响应报文有也有两个特有的字段:状态码与状态码描述。除此之外,我们还需要动用HttpMessage的_data字段来存储要发给客户端的对象。
响应报文的序列化看起来是较为简单的,只要按照响应报文的格式拼接字符串即可。
但是要使我们回复给客户端的数据被正确解析,我们还需要两个重要的响应报头字段:"Content-Length"(响应正文的长度)、"Content-Type"(响应正文的数据类型)。
响应报文的长度可以由如下方法进行获取:
class Util
{
public:static size_t FileSize(const std::string &path){// 以二进制模式打开文件(避免文本模式下的换行符转换影响计算)std::ifstream file(path, std::ios::in | std::ios::binary);if (!file.is_open()){LOG(LogLevel::ERROR) << "无法打开文件: " << path << strerror(errno);return -1; // 打开失败返回-1}// 将读指针移动到文件末尾file.seekg(0, std::ios::end);// 获取当前指针位置(即文件大小)size_t size = file.tellg();return size;}
};
Content-Type可以通过提取文件的后缀名,并通过一个映射集来进行映射的方式获取:
// 状态行: 版本 + 空格 + 状态码 + 空格 + 状态码描述 + 换行符
class HttpResponse : public HttpMessage
{
public:HttpResponse() {}// 序列化std::string Serialize(){std::string status_line = _version + _Space + std::to_string(_status) + _Space + _StatusDesc[_status] + _LineBreak;std::string response_headers;for (auto &header : _headers){response_headers += header.first + _HeaderSep + header.second + _LineBreak;}std::string message = status_line + response_headers + _BlankLine + _data;return message;}std::string Serialize(const std::string &version, int status, const std::string &&data){// 更新成员变量_version = version;_status = status;_data = std::move(data);return Serialize();}const std::string &GetMineType(const std::string &extension){if (_MineType.count(extension) == 0){LOG(LogLevel::ERROR) << "ExtensionToType: 未知的拓展名! [" << extension << "]";return _MineType[""];}return _MineType[extension];}// 反序列化// bool Deserialize(const std::string request_str) {}~HttpResponse() {}private:int _status; // 状态码static std::unordered_map<int, std::string> _StatusDesc; // 状态码描述static std::unordered_map<std::string, std::string> _MineType; // 后缀转HTTP数据类型
};
std::unordered_map<int, std::string> HttpResponse::_StatusDesc = {{200, "OK"},{404, "Not Found"}};
std::unordered_map<std::string, std::string> HttpResponse::_MineType = {{"", "text/plain"},{"txt", "text/plain"},{"html", "text/html"},{"htm", "text/html"},{"xml", "text/xml"},{"gif", "image/gif"},{"jpg", "image/jpeg"},{"png", "image/png"}};
2.2 HTTP协议的定义
class Http
{
private:void SetContentLength(HttpResponse &response, const std::string &path){size_t filesize = Util::FileSize(path);response.AddHeader("Content-Length", std::to_string(filesize));}void SetContentType(HttpResponse &response, const std::string &path){static std::string point = ".";auto pos = path.rfind(point);std::string extension;if (pos == std::string::npos){extension = "";}else{extension = path.substr(pos + point.size());}response.AddHeader("Content-Type", response.GetMineType(extension));}std::string MakeResponse(const std::string &uri, HttpResponse &response){std::string path, data;if (uri == "/")path = _HomePage;elsepath = _WebRoot + uri;int status = 200;if (!Util::ReadFile(path, data)){status = 404;LOG(LogLevel::ERROR) << "Http: 获取资源失败! [" << path << "]";path = _404Page;Util::ReadFile(path, data);}SetContentLength(response, path);SetContentType(response, path);return response.Serialize(_Version, status, std::move(data));}public:// TCPServer的回调函数void RequestHandler(const std::shared_ptr<TCPConnectSocket> &con_socket){std::string cli_message, buffer;while (con_socket->Receive(buffer) > 0){cli_message += buffer;LOG(LogLevel::INFO) << "来自[" << con_socket->addr().Info() << "]的Http请求报文:\n\r" << buffer;HttpRequest request;if (!request.Deserialize(cli_message))continue;LOG(LogLevel::DEBUG) << "request 反序列化成功";HttpResponse response;std::string message = MakeResponse(request.Uri(), response);con_socket->Send(message);}}private:static const std::string _Version; // Http版本static const std::string _WebRoot; // 网页根目录static const std::string _HomePage; // 首页static const std::string _404Page; // 404页面
};
const std::string Http::_Version = "HTTP/1.1";
const std::string Http::_WebRoot = "/home/shishen/113code/linux-c/Http协议/WebRoot";
const std::string Http::_HomePage = Http::_WebRoot + "/index.html";
const std::string Http::_404Page = Http::_WebRoot + "/404.html";
3. 整个服务端的实现
shishen@hcss-ecs-b8e6:~/113code/linux-c/Http协议$ tree
.
├── HttpServer
│ ├── Common.hpp
│ ├── Http.hpp
│ ├── InetAddr.hpp
│ ├── Log.hpp
│ ├── main.cpp
│ ├── Makefile
│ ├── Mutex.hpp
│ ├── Socket.hpp
│ ├── TCPServer.hpp
│ ├── testHttp
│ └── Util.hpp
└── WebRoot├── 404.html├── image│ └── 666.jpg├── index.html├── login.html└── register.html3 directories, 16 files
3.1 main.cpp
#include "Http.hpp"
#include "Common.hpp"
#include <memory>
#include <unistd.h>
#include <signal.h>int main(int argc, char* args[])
{if(argc != 2){LOG(LogLevel::FATAL) << "Usage: " << args[0] << " port";exit(USAGE_ERROR);}USE_FILE_STRATEGY();Http http;in_port_t port = std::stoi(args[1]);TCPServer server(port, [&http](const std::shared_ptr<TCPConnectSocket>& con_socket){http.RequestHandler(con_socket);});daemon(0, 0);signal(SIGCHLD, SIG_IGN);server.Run();return 0;
}
3.2 Http.hpp
#pragma once
#include "TCPServer.hpp"
#include "Util.hpp"
#include <unordered_map>
#include <memory>
#include <sstream>class HttpMessage
{
public:// 向_headers中添加键值对bool AddHeader(const std::string &header){auto pos = header.find(_HeaderSep);if (pos == std::string::npos){return false;}std::string key = header.substr(0, pos);std::string value = header.substr(pos + _HeaderSep.size());_headers[key] = value;return true;}bool AddHeader(const std::string &key, const std::string &value){_headers[key] = value;return true;}protected:static const std::string _Space; // 空格static const std::string _LineBreak; // 换行符static const std::string _BlankLine; // 空行static const std::string _HeaderSep; // 报头分割符std::string _version; // Http版本std::unordered_map<std::string, std::string> _headers; // 请求/响应报头std::string _data; // 请求/响应正文
};
const std::string HttpMessage::_Space = " ";
const std::string HttpMessage::_LineBreak = "\r\n";
const std::string HttpMessage::_BlankLine = "\r\n";
const std::string HttpMessage::_HeaderSep = ": ";// 请求行: 方法 + 空格 + URI + 空格 + 版本 + 换行符
class HttpRequest : public HttpMessage
{
private:void PraseRequestLine(const std::string &request_line){std::stringstream buffer(request_line);buffer >> _method >> _uri >> _version;}public:HttpRequest() {}// 序列化// std::string Serialize() {}// 反序列化bool Deserialize(std::string &request_str){static std::string end = _LineBreak + _BlankLine;// 判断是否存在一个完整报文if (request_str.find(end) == std::string::npos){return false;}// 读取请求行std::string request_line;Util::ReadLine(request_str, _LineBreak, request_line);PraseRequestLine(request_line);// 读取请求报头std::string header;do{Util::ReadLine(request_str, _LineBreak, header);if (header.size() > 0){AddHeader(header);}} while (header.size() > 0); // 读取到空行结束return true;}const std::string &Uri() const { return _uri; }~HttpRequest() {}private:std::string _method; // Http请求方法std::string _uri; // URI
};// 状态行: 版本 + 空格 + 状态码 + 空格 + 状态码描述 + 换行符
class HttpResponse : public HttpMessage
{
public:HttpResponse() {}// 序列化std::string Serialize(){std::string status_line = _version + _Space + std::to_string(_status) + _Space + _StatusDesc[_status] + _LineBreak;std::string response_headers;for (auto &header : _headers){response_headers += header.first + _HeaderSep + header.second + _LineBreak;}std::string message = status_line + response_headers + _BlankLine + _data;return message;}std::string Serialize(const std::string &version, int status, const std::string &&data){// 更新成员变量_version = version;_status = status;_data = std::move(data);return Serialize();}const std::string &GetMineType(const std::string &extension){if (_MineType.count(extension) == 0){LOG(LogLevel::ERROR) << "ExtensionToType: 未知的拓展名! [" << extension << "]";return _MineType[""];}return _MineType[extension];}// 反序列化// bool Deserialize(const std::string request_str) {}~HttpResponse() {}private:int _status; // 状态码static std::unordered_map<int, std::string> _StatusDesc; // 状态码描述static std::unordered_map<std::string, std::string> _MineType; // 后缀转HTTP数据类型
};
std::unordered_map<int, std::string> HttpResponse::_StatusDesc = {{200, "OK"},{404, "Not Found"}};
std::unordered_map<std::string, std::string> HttpResponse::_MineType = {{"", "text/plain"},{"txt", "text/plain"},{"html", "text/html"},{"htm", "text/html"},{"xml", "text/xml"},{"gif", "image/gif"},{"jpg", "image/jpeg"},{"png", "image/png"}};class Http
{
private:void SetContentLength(HttpResponse &response, const std::string &path){size_t filesize = Util::FileSize(path);response.AddHeader("Content-Length", std::to_string(filesize));}void SetContentType(HttpResponse &response, const std::string &path){static std::string point = ".";auto pos = path.rfind(point);std::string extension;if (pos == std::string::npos){extension = "";}else{extension = path.substr(pos + point.size());}response.AddHeader("Content-Type", response.GetMineType(extension));}std::string MakeResponse(const std::string &uri, HttpResponse &response){std::string path, data;if (uri == "/")path = _HomePage;elsepath = _WebRoot + uri;int status = 200;if (!Util::ReadFile(path, data)){status = 404;LOG(LogLevel::ERROR) << "Http: 获取资源失败! [" << path << "]";path = _404Page;Util::ReadFile(path, data);}SetContentLength(response, path);SetContentType(response, path);return response.Serialize(_Version, status, std::move(data));}public:// TCPServer的回调函数void RequestHandler(const std::shared_ptr<TCPConnectSocket> &con_socket){std::string cli_message, buffer;while (con_socket->Receive(buffer) > 0){cli_message += buffer;LOG(LogLevel::INFO) << "来自[" << con_socket->addr().Info() << "]的Http请求报文:\n\r" << buffer;HttpRequest request;if (!request.Deserialize(cli_message))continue;LOG(LogLevel::DEBUG) << "request 反序列化成功";HttpResponse response;std::string message = MakeResponse(request.Uri(), response);con_socket->Send(message);}}private:static const std::string _Version; // Http版本static const std::string _WebRoot; // 网页根目录static const std::string _HomePage; // 首页static const std::string _404Page; // 404页面
};
const std::string Http::_Version = "HTTP/1.1";
const std::string Http::_WebRoot = "/home/shishen/113code/linux-c/Http协议/WebRoot";
const std::string Http::_HomePage = Http::_WebRoot + "/index.html";
const std::string Http::_404Page = Http::_WebRoot + "/404.html";
3.3 Util.hpp
#pragma once
#include <iostream>
#include <fstream>
#include "Log.hpp"
using namespace LogModule;class Util
{
public:static bool ReadFile(const std::string &file_path, std::string &content){// 必须要按照二进制的方式读取数据,否则非文本数据无法传输std::ifstream file(file_path, std::ios::binary);if (!file.is_open()){LOG(LogLevel::ERROR) << "Util::ReadFile: 无法打开文件[" << file_path << "]! " << strerror(errno);return false;}size_t file_size = FileSize(file_path);content.resize(file_size);// 5. 读取二进制数据到字符串if (!file.read(&content[0], file_size)){LOG(LogLevel::ERROR) << "Util::ReadFile: 读取文件失败[" << file_path << "]! ";return false;}file.close();return true;}static bool ReadLine(std::string &message, const std::string &sep, std::string &one_line){auto pos = message.find(sep);if (pos == std::string::npos){LOG(LogLevel::WARNING) << "Util::ReadLine: 未能提取出一行! ";return false;}one_line = message.substr(0, pos);message.erase(0, pos + sep.size());return true;}static size_t FileSize(const std::string &path){// 以二进制模式打开文件(避免文本模式下的换行符转换影响计算)std::ifstream file(path, std::ios::in | std::ios::binary);if (!file.is_open()){LOG(LogLevel::ERROR) << "无法打开文件: " << path << strerror(errno);return -1; // 打开失败返回-1}// 将读指针移动到文件末尾file.seekg(0, std::ios::end);// 获取当前指针位置(即文件大小)size_t size = file.tellg();return size;}
};
其余的hpp文件在以往的文章当中都有,就不再重复贴出了,读者也可以到仓库中找。
https://gitee.com/da-guan-mu-lao-sheng/linux-c/tree/master/Http%E5%8D%8F%E8%AE%AE/HttpServer
至于网页对象,可以让ai帮忙生成几个html对象用于调试。
4. 效果
这样一来,我们的Web服务器基本上就能完成大部分浏览器的请求了。
但是,我们目前还只能完成GET的请求方法,对于报头的处理也以忽略为主,总之我们的服务器还是非常简单的版本。