基于C语言实现的KV存储引擎(二)
基于C语言实现的KV存储引擎(二)
- 协议制定
- 用于存储的数据结构
- 数组
- 红黑树
- hash
- 跳表
- 性能测试
- 项目总结
协议制定
当前KV存储引擎是基于 TCP 协议进行实现的,那么就有很多人不理解了,为什么存在协议了还需要去制定协议呢?
首先我们需要明白,TCP 是一种公共的协议,客户端与服务端建立连接,需要使用到 TCP 协议,但是连接建立以后,收发数据的协议依然需要我们自己来进行制定,也就是说发什么样的消息,回什么样的数据,这些都是我们自己来进行制定的。
当前我们仿照的就是一种 Redis 的处理方式,就如同下面这种方式:
Redis 就是像如下的这种方式来进行解析的:
用于存储的数据结构
数组
对于[key,value]
键值对我们可以采用多种数据结构来进行存储,数组,红黑树,B+树,跳表等等,接下来都会一 一来进行实现,首先我们来看数组,因为是基于 C 语言进行实现的,所以我们就需要定义一个结构体来保存对应的[key,value]
值。
typedef struct kvs_array_item_s {char *key;char *value;
} kvs_array_item_t;
接下来我们就需要仿照 Redis 来实现,主要实现以下的 6 个指令:
const char *command[] = {"SET", "GET", "DEL", "MOD", "EXIST"
};
我们需要去制定一套自己的协议解析的规则,那么我们也是仿照 Redis 来进行实现的:
kvs_array.c
#include "kvstore.h"kvs_array_t global_array = {0};int kvs_array_create(kvs_array_t *inst)
{if (inst == NULL) {return -1;}if (inst->table) {printf("table has alloced\n");return -1;}// 不直接使用系统调用inst->table = kvs_malloc(sizeof(kvs_array_item_t) * ARRAY_SIZE);if (inst->table == NULL) {printf("kvs_malloc failed!\n");return -1;}// inst->idx = 0;inst->totalsize = 0;
}void kvs_array_destroy(kvs_array_t *inst)
{if (inst == NULL) {return;}if (inst->table) {kvs_free(inst->table);inst->table = NULL;}// inst->idx = 0;// inst->totalsize = 0;
}int kvs_array_set(kvs_array_t *inst, char* key, char* value)
{if (inst == NULL || key == NULL || value == NULL) {return -1;}if (inst->totalsize >= ARRAY_SIZE) {return -1;}char* str = kvs_array_get(inst, key);if (str) {return 1;}// 对于key的处理char* kcopy = kvs_malloc(strlen(key) + 1);if (kcopy == NULL) {return -2;}memset(kcopy, 0, strlen(key) + 1);strncpy(kcopy, key, strlen(key));// 对于value的处理char* kvalue = kvs_malloc(strlen(value) + 1);if (kvalue == NULL) {return -2;}memset(kvalue, 0, strlen(value) + 1);strncpy(kvalue, value, strlen(value));// 将对应的key和value放入数组中int i = 0;for (i = 0; i < inst->totalsize; i++) {if (inst->table[i].key == NULL) {inst->table[i].key = kcopy;inst->table[i].value = kvalue;inst->totalsize++;return 0;}}if (i == inst->totalsize && i < ARRAY_SIZE) {inst->table[i].key = kcopy;inst->table[i].value = kvalue;inst->totalsize++;}// printf("%s %s\n", inst->table[i].key, inst->table[i].value);return 0;
}// 获取key对应的value
char* kvs_array_get(kvs_array_t *inst, char* key)
{if (inst == NULL || key == NULL) {return NULL;}int i = 0;for (i = 0; i < inst->totalsize; i++) {if (inst->table[i].key == NULL) {continue;}if (strcmp(inst->table[i].key, key) == 0) {return inst->table[i].value;}}return NULL;
}/*0 成功1 失败
*/
// 删除key, value
int kvs_array_del(kvs_array_t *inst, char* key)
{if (inst == NULL || key == NULL) {return -1;}int i = 0;for (i = 0; i < inst->totalsize; i++) {if (inst->table[i].key == NULL) {continue;}if (strcmp(inst->table[i].key, key) == 0) {kvs_free(inst->table[i].key);inst->table[i].key = NULL;kvs_free(inst->table[i].value);inst->table[i].value = NULL;// inst->totalsize--;return 0;}}return i;
}/*0 成功1 失败
*/
// 修改key对应的value
int kvs_array_mod(kvs_array_t *inst, char* key, char* value)
{if (inst == NULL || key == NULL || value == NULL) {return -1;}int i = 0;for (i = 0; i < inst->totalsize; i++) {if (inst->table[i].key == NULL) {continue;}if (strcmp(inst->table[i].key, key) == 0) {kvs_free(inst->table[i].value);inst->table[i].value = kvs_malloc(strlen(value) + 1);if (inst->table[i].value == NULL) {return -2;}memset(inst->table[i].value, 0, strlen(value) + 1);strncpy(inst->table[i].value, value, strlen(value));return 0;}}return i;
}/*0 存在1 不存在
*/
// 检查key是否存在
int kvs_array_exist(kvs_array_t *inst, char* key)
{if (inst == NULL || key == NULL) {return -1;}char* str = kvs_array_get(inst, key);if (str) {return 1;}return 0;
}
整个 kvs_array.c
文件实现的就是我们对应的操作指令。
kvstore.c
#include "kvstore.h"#if ENABLE_ARRAY
extern kvs_array_t global_array;
#endifconst char *command[] = {"SET", "GET", "DEL", "MOD", "EXIST"
};const char *response[] = {};// 枚举定义命令
enum
{KVS_CMD_START = 0,KVS_CMD_SET = KVS_CMD_START, // 0KVS_CMD_GET, // 1KVS_CMD_DEL, // 2KVS_CMD_MOD, // 3KVS_CMD_EXIST, // 4KVS_CMD_COUNT // 5
};void* kvs_malloc(size_t size)
{return malloc(size);
}void kvs_free(void* ptr)
{free(ptr);
}// tokens[0]: SET
// tokens[1]: Key
// tokens[2]: Value
int kvs_filter_protocal(char** tokens, int count, char* response)
{if (tokens == NULL || count == 0 || response == NULL) {return -1;}int cmd = KVS_CMD_START;for(cmd = KVS_CMD_START; cmd < KVS_CMD_COUNT; cmd++) {if(strcmp(tokens[0], command[cmd]) == 0) {break;}}int ret, length = 0;switch(cmd) {case KVS_CMD_SET:ret = kvs_array_set(&global_array, tokens[1], tokens[2]);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_GET:char* value = kvs_array_get(&global_array, tokens[1]);if (value == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", value);}break;case KVS_CMD_DEL:ret = kvs_array_del(&global_array, tokens[1]);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_MOD:ret = kvs_array_mod(&global_array, tokens[1], tokens[2]);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_EXIST:ret = kvs_array_exist(&global_array, tokens[1]);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "NOEXIST\r\n");} else if (ret > 0) {length = sprintf(response, "EXIST\r\n");}break;default:break;}return length;
}// 返回对应的token数量
// SET Key Value
// GET Key
int kvs_split_token(char* msg, char* tokens[])
{if (msg == NULL || tokens == NULL) {return -1;}char* token = strtok(msg, " ");int idx = 0;while (token != NULL) {// printf("idx: %d, token: %s\n", idx, token);tokens[idx++] = token;token = strtok(NULL, " ");}return idx;
}
/*
* @brief 协议解析
* @param msg 消息体
* @param length 消息体长度
* @param response 响应体
* @return 0 成功 -1 失败
*/
// 协议解析
int kvs_protocal(char* msg, int length, char* response)
{if (msg == NULL || length <= 0 || response == NULL) {return -1;}printf("recv: %d, %s\n", length, msg);char* tokens[KVS_MAX_TOKEN];int count = kvs_split_token(msg, tokens);if (count == -1) {return -1;}return kvs_filter_protocal(tokens, count, response);
}int init_kvengine(void)
{
#if ENABLE_ARRAYmemset(&global_array, 0, sizeof(kvs_array_t));kvs_array_create(&global_array);
#endifreturn 0;
}int main(int argc, char* argv[])
{init_kvengine();if (argc != 2) {printf("Usage: %s <port>\n", argv[0]);return -1;}if (NETWORK_TYPE == NETWORK_RECATOR) {recator_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_PROACTOR) {proactor_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_NTYCO) {ntyco_start(atoi(argv[1]), kvs_protocal);}return 0;
}
kvstore.h
#ifndef __KV_STORE__
#define __KV_STORE__#include <stdio.h>
#include <stdlib.h>
#include <string.h>#define NETWORK_RECATOR 0
#define NETWORK_PROACTOR 1
#define NETWORK_NTYCO 2#define NETWORK_TYPE NETWORK_RECATOR#define KVS_MAX_TOKEN 128
#define ENABLE_ARRAY 1
#define ARRAY_SIZE 1024typedef int (*msg_handler)(char *msg, int length, char *response);
extern int recator_entry(unsigned short port, msg_handler handler);
extern int ntyco_start(unsigned short port, msg_handler handler);
extern int proactor_entry(unsigned short port, msg_handler handler);#if ENABLE_ARRAYtypedef struct kvs_array_item_s {char *key;char *value;
} kvs_array_item_t;typedef struct kvs_array_s {kvs_array_item_t *table; // 数组指针int idx; // 数组当前索引int totalsize; // 使用大小
} kvs_array_t;int kvs_array_create(kvs_array_t *inst);
void kvs_array_destroy(kvs_array_t *inst);
int kvs_array_set(kvs_array_t *inst, char* key, char* value);
char* kvs_array_get(kvs_array_t *inst, char* key);
int kvs_array_del(kvs_array_t *inst, char* key);
int kvs_array_mod(kvs_array_t *inst, char* key, char* value);
int kvs_array_exist(kvs_array_t *inst, char* key);#endifvoid kvs_free(void* ptr);
void* kvs_malloc(size_t size);#endif
以上是基于数组来进行实现的,我们可以来看一下测试效果:
关于makefile
CC = gcc
TARGET = kvstore
SRC = kvstore.c reactor.c proactor.c ntyco.c kvs_array.c
INCLUDES = -I ./NtyCo/core/
LIBS = -L ./NtyCo/ -luring -lntycoall:$(CC) -o $(TARGET) $(SRC) $(INCLUDES) $(LIBS)clean:rm -f $(TARGET)
测试用例的实现
当前我们是使用网络助手来进行测试的,每次只能发送一条连接,接下来我们就需要来创建测试用例,目标就是:
- TCP 客户端,建立连接;
- 发送协议;
- 接收服务端返回的数据;
- 预期数据与服务端返回的数据进行对比。
testcase.c
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>#define TESTCASE_MAX_MSG_LEN 1024
// 发送消息
int send_msg(int connfd, char* msg, int len)
{int ret = send(connfd, msg, len, 0);if (ret < 0) {perror("send");return -1;}return ret;
}// 接收消息
int recv_msg(int connfd, char* msg, int len)
{int ret = recv(connfd, msg, len, 0);if (ret < 0) {perror("recv");return -1;}return ret;
}// 测试用例
void testcase(int connfd, char* msg, char* pattern, char* casename)
{if (connfd < 0 || msg == NULL || pattern == NULL || casename == NULL) {return;}// 发送消息send_msg(connfd, msg, strlen(msg));// 接收消息char result[TESTCASE_MAX_MSG_LEN] = {0};recv_msg(connfd, result, TESTCASE_MAX_MSG_LEN);if (strcmp(pattern, result) == 0) {printf("testcase %s pass\n", casename);} else {printf("testcase %s fail, pattern: %s, result: %s\n", casename, pattern, result);exit(1);}
}// 连接tcp服务器
int conncet_tcpserver(const char *ip, unsigned int port)
{int connfd = socket(AF_INET, SOCK_STREAM, 0);if (connfd < 0) {perror("socket");return -1;}struct sockaddr_in server_addr;memset(&server_addr, 0, sizeof(server_addr));server_addr.sin_family = AF_INET;server_addr.sin_port = htons(port);server_addr.sin_addr.s_addr = inet_addr(ip);if (connect(connfd, (struct sockaddr *)&server_addr, sizeof(server_addr)) < 0) {perror("connect");return -1;}return connfd;
}int main(int argc, char *argv[])
{if (argc != 3) {printf("Usage: %s <ip> <port>\n", argv[0]);return -1;}// 连接tcp服务器int connfd = conncet_tcpserver(argv[1], atoi(argv[2]));if (connfd < 0) {return -1;}testcase(connfd, "SET Teacher King", "OK\r\n", "SET-Teacher");testcase(connfd, "GET Teacher", "King\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "OK\r\n", "MOD-Teacher");testcase(connfd, "GET Teacher", "Mark\r\n", "GET-Teacher");testcase(connfd, "EXIST Teacher", "EXIST\r\n", "EXIST-Teacher");testcase(connfd, "DEL Teacher", "OK\r\n", "DEL-Teacher");testcase(connfd, "GET Teacher", "NOEXIST\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "NOEXIST\r\n", "MOD-Teacher");testcase(connfd, "EXIST Teacher", "NOEXIST\r\n", "EXIST-Teacher");return 0;
}
运行以后,我们可以看见对应的效果:
红黑树
上面我们使用数组来实现了 KV 存储引擎的键值对存储,但是实际开发中是很少会使用到数组结构的,接下来我们来使用红黑树来进行实现存储,红黑树是一颗平衡二叉搜索树,他的查找和添加的速度也是非常快的,可以达到O Log(N)。
整体来说他就是如下的这样一个结构:
目前整体的架构都已经搭建完毕,如果要实现红黑树模块,只需要将对应的代码添加进去即可,为了跟数组进行区分,我们对红黑树进行以下设置:
const char *command[] = {"SET", "GET", "DEL", "MOD", "EXIST","RSET", "RGET", "RDEL", "RMOD", "REXIST"
};const char *response[] = {};// 枚举定义命令
enum
{KVS_CMD_START = 0,// arrayKVS_CMD_SET = KVS_CMD_START, // 0KVS_CMD_GET, // 1KVS_CMD_DEL, // 2KVS_CMD_MOD, // 3KVS_CMD_EXIST, // 4// rbtree KVS_CMD_RSET, // 5KVS_CMD_RGET, // 6KVS_CMD_RDEL, // 7KVS_CMD_RMOD, // 8KVS_CMD_REXIST, // 9KVS_CMD_COUNT // 10
};
kvstore.h
#ifndef __KV_STORE__
#define __KV_STORE__#include <stdio.h>
#include <stdlib.h>
#include <string.h>#define NETWORK_RECATOR 0
#define NETWORK_PROACTOR 1
#define NETWORK_NTYCO 2#define NETWORK_TYPE NETWORK_NTYCO#define KVS_MAX_TOKEN 128
#define ENABLE_ARRAY 0
#define ENABLE_RBTREE 1
#define ARRAY_SIZE 1024typedef int (*msg_handler)(char *msg, int length, char *response);
extern int recator_entry(unsigned short port, msg_handler handler);
extern int ntyco_start(unsigned short port, msg_handler handler);
extern int proactor_entry(unsigned short port, msg_handler handler);#if ENABLE_ARRAYtypedef struct kvs_array_item_s {char *key;char *value;
} kvs_array_item_t;typedef struct kvs_array_s {kvs_array_item_t *table; // 数组指针int idx; // 数组当前索引int totalsize; // 使用大小
} kvs_array_t;int kvs_array_create(kvs_array_t *inst);
void kvs_array_destroy(kvs_array_t *inst);
int kvs_array_set(kvs_array_t *inst, char* key, char* value);
char* kvs_array_get(kvs_array_t *inst, char* key);
int kvs_array_del(kvs_array_t *inst, char* key);
int kvs_array_mod(kvs_array_t *inst, char* key, char* value);
int kvs_array_exist(kvs_array_t *inst, char* key);#endif#if ENABLE_RBTREE
#define RED 1
#define BLACK 2#define ENABLE_TYPE_CHAR 1#if ENABLE_TYPE_CHARtypedef char* KEY_TYPE;#elsetypedef int KEY_TYPE;#endiftypedef struct _rbtree_node
{unsigned char color;struct _rbtree_node *right;struct _rbtree_node *left;struct _rbtree_node *parent;KEY_TYPE key;void *value;
} rbtree_node;typedef struct _rbtree
{rbtree_node *root;rbtree_node *nil;
} rbtree;typedef struct _rbtree kvs_rbtree_t;int kvs_rbtree_create(kvs_rbtree_t *rbtree);
void kvs_rbtree_destroy(kvs_rbtree_t *rbtree);
int kvs_rbtree_set(kvs_rbtree_t *rbtree, char* key, char* value);
char* kvs_rbtree_get(kvs_rbtree_t *rbtree, char* key);
int kvs_rbtree_del(kvs_rbtree_t *rbtree, char* key);
int kvs_rbtree_mod(kvs_rbtree_t *rbtree, char* key, char* value);
int kvs_rbtree_exist(kvs_rbtree_t *rbtree, char* key);#endifvoid kvs_free(void* ptr);
void* kvs_malloc(size_t size);#endif
kvstore.c
#include "kvstore.h"#if ENABLE_ARRAY
extern kvs_array_t global_array;
#endif#if ENABLE_RBTREE
extern kvs_rbtree_t global_rbtree;
#endifconst char *command[] = {"SET", "GET", "DEL", "MOD", "EXIST","RSET", "RGET", "RDEL", "RMOD", "REXIST"
};const char *response[] = {};// 枚举定义命令
enum
{KVS_CMD_START = 0,// arrayKVS_CMD_SET = KVS_CMD_START, // 0KVS_CMD_GET, // 1KVS_CMD_DEL, // 2KVS_CMD_MOD, // 3KVS_CMD_EXIST, // 4// rbtree KVS_CMD_RSET, // 5KVS_CMD_RGET, // 6KVS_CMD_RDEL, // 7KVS_CMD_RMOD, // 8KVS_CMD_REXIST, // 9KVS_CMD_COUNT // 10
};void* kvs_malloc(size_t size)
{return malloc(size);
}void kvs_free(void* ptr)
{free(ptr);
}// tokens[0]: SET
// tokens[1]: Key
// tokens[2]: Value
int kvs_filter_protocal(char** tokens, int count, char* response)
{if (tokens == NULL || count == 0 || response == NULL) {return -1;}int cmd = KVS_CMD_START;for(cmd = KVS_CMD_START; cmd < KVS_CMD_COUNT; cmd++) {if(strcmp(tokens[0], command[cmd]) == 0) {break;}}int ret, length = 0;char* key = tokens[1];char* value = tokens[2];switch(cmd) {
#if ENABLE_ARRAYcase KVS_CMD_SET:ret = kvs_array_set(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_GET:char* value = kvs_array_get(&global_rbtree, key);if (value == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", value);}break;case KVS_CMD_DEL:ret = kvs_array_del(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_MOD:ret = kvs_array_mod(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_EXIST:ret = kvs_array_exist(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "NOEXIST\r\n");} else if (ret > 0) {length = sprintf(response, "EXIST\r\n");}break;
#endif
#if ENABLE_RBTREE// rbtreecase KVS_CMD_RSET:ret = kvs_rbtree_set(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_RGET:char* result = kvs_rbtree_get(&global_rbtree, key);if (result == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", result);}break;case KVS_CMD_RDEL:ret = kvs_rbtree_del(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_RMOD:ret = kvs_rbtree_mod(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_REXIST:ret = kvs_rbtree_exist(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "EXIST\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;
#endifdefault:break;}return length;
}// 返回对应的token数量
// SET Key Value
// GET Key
int kvs_split_token(char* msg, char* tokens[])
{if (msg == NULL || tokens == NULL) {return -1;}char* token = strtok(msg, " ");int idx = 0;while (token != NULL) {// printf("idx: %d, token: %s\n", idx, token);tokens[idx++] = token;token = strtok(NULL, " ");}return idx;
}
/*
* @brief 协议解析
* @param msg 消息体
* @param length 消息体长度
* @param response 响应体
* @return 0 成功 -1 失败
*/
// 协议解析
int kvs_protocal(char* msg, int length, char* response)
{if (msg == NULL || length <= 0 || response == NULL) {return -1;}// printf("recv: %d, %s\n", length, msg);char* tokens[KVS_MAX_TOKEN];int count = kvs_split_token(msg, tokens);if (count == -1) {return -1;}return kvs_filter_protocal(tokens, count, response);
}int init_kvengine(void)
{
#if ENABLE_ARRAYmemset(&global_array, 0, sizeof(kvs_array_t));kvs_array_create(&global_array);
#endif#if ENABLE_RBTREEmemset(&global_rbtree, 0, sizeof(kvs_rbtree_t));kvs_rbtree_create(&global_rbtree);
#endifreturn 0;
}void destory_kvengine(void)
{
#if ENABLE_ARRAYkvs_array_destroy(&global_array);
#endif#if ENABLE_RBTREEkvs_rbtree_destroy(&global_rbtree);
#endifreturn ;
}int main(int argc, char* argv[])
{init_kvengine();if (argc != 2) {printf("Usage: %s <port>\n", argv[0]);return -1;}if (NETWORK_TYPE == NETWORK_RECATOR) {recator_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_PROACTOR) {proactor_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_NTYCO) {ntyco_start(atoi(argv[1]), kvs_protocal);}// 销毁kv引擎destory_kvengine();return 0;
}
kvs_rbtree.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "kvstore.h"rbtree_node *rbtree_mini(rbtree *T, rbtree_node *x)
{while (x->left != T->nil){x = x->left;}return x;
}rbtree_node *rbtree_maxi(rbtree *T, rbtree_node *x)
{while (x->right != T->nil){x = x->right;}return x;
}rbtree_node *rbtree_successor(rbtree *T, rbtree_node *x)
{rbtree_node *y = x->parent;if (x->right != T->nil){return rbtree_mini(T, x->right);}while ((y != T->nil) && (x == y->right)){x = y;y = y->parent;}return y;
}void rbtree_left_rotate(rbtree *T, rbtree_node *x)
{rbtree_node *y = x->right; // x --> y , y --> x, right --> left, left --> rightx->right = y->left; // 1 1if (y->left != T->nil){ // 1 2y->left->parent = x;}y->parent = x->parent; // 1 3if (x->parent == T->nil){ // 1 4T->root = y;}else if (x == x->parent->left){x->parent->left = y;}else{x->parent->right = y;}y->left = x; // 1 5x->parent = y; // 1 6
}void rbtree_right_rotate(rbtree *T, rbtree_node *y)
{rbtree_node *x = y->left;y->left = x->right;if (x->right != T->nil){x->right->parent = y;}x->parent = y->parent;if (y->parent == T->nil){T->root = x;}else if (y == y->parent->right){y->parent->right = x;}else{y->parent->left = x;}x->right = y;y->parent = x;
}void rbtree_insert_fixup(rbtree *T, rbtree_node *z)
{while (z->parent->color == RED){ // z ---> REDif (z->parent == z->parent->parent->left){rbtree_node *y = z->parent->parent->right;if (y->color == RED){z->parent->color = BLACK;y->color = BLACK;z->parent->parent->color = RED;z = z->parent->parent; // z --> RED}else{if (z == z->parent->right){z = z->parent;rbtree_left_rotate(T, z);}z->parent->color = BLACK;z->parent->parent->color = RED;rbtree_right_rotate(T, z->parent->parent);}}else{rbtree_node *y = z->parent->parent->left;if (y->color == RED){z->parent->color = BLACK;y->color = BLACK;z->parent->parent->color = RED;z = z->parent->parent; // z --> RED}else{if (z == z->parent->left){z = z->parent;rbtree_right_rotate(T, z);}z->parent->color = BLACK;z->parent->parent->color = RED;rbtree_left_rotate(T, z->parent->parent);}}}T->root->color = BLACK;
}void rbtree_insert(rbtree *T, rbtree_node *z)
{rbtree_node *y = T->nil;rbtree_node *x = T->root;while (x != T->nil){y = x;
#if ENABLE_TYPE_CHARif (strcmp(z->key, x->key) < 0){x = x->left;}else if (strcmp(z->key, x->key) > 0){x = x->right;}else{ // Existreturn;}
#elseif (z->key < x->key){x = x->left;}else if (z->key > x->key){x = x->right;}else{ // Existreturn;}
#endif}z->parent = y;
#if ENABLE_TYPE_CHARif (y == T->nil){T->root = z;}else if (strcmp(z->key, y->key) < 0){y->left = z;}else{y->right = z;}
#elseif (y == T->nil){T->root = z;}else if (z->key < y->key){y->left = z;}else{y->right = z;}
#endifz->left = T->nil;z->right = T->nil;z->color = RED;rbtree_insert_fixup(T, z);
}void rbtree_delete_fixup(rbtree *T, rbtree_node *x)
{while ((x != T->root) && (x->color == BLACK)){if (x == x->parent->left){rbtree_node *w = x->parent->right;if (w->color == RED){w->color = BLACK;x->parent->color = RED;rbtree_left_rotate(T, x->parent);w = x->parent->right;}if ((w->left->color == BLACK) && (w->right->color == BLACK)){w->color = RED;x = x->parent;}else{if (w->right->color == BLACK){w->left->color = BLACK;w->color = RED;rbtree_right_rotate(T, w);w = x->parent->right;}w->color = x->parent->color;x->parent->color = BLACK;w->right->color = BLACK;rbtree_left_rotate(T, x->parent);x = T->root;}}else{rbtree_node *w = x->parent->left;if (w->color == RED){w->color = BLACK;x->parent->color = RED;rbtree_right_rotate(T, x->parent);w = x->parent->left;}if ((w->left->color == BLACK) && (w->right->color == BLACK)){w->color = RED;x = x->parent;}else{if (w->left->color == BLACK){w->right->color = BLACK;w->color = RED;rbtree_left_rotate(T, w);w = x->parent->left;}w->color = x->parent->color;x->parent->color = BLACK;w->left->color = BLACK;rbtree_right_rotate(T, x->parent);x = T->root;}}}x->color = BLACK;
}rbtree_node *rbtree_delete(rbtree *T, rbtree_node *z)
{rbtree_node *y = T->nil;rbtree_node *x = T->nil;if ((z->left == T->nil) || (z->right == T->nil)){y = z;}else{y = rbtree_successor(T, z);}if (y->left != T->nil){x = y->left;}else if (y->right != T->nil){x = y->right;}x->parent = y->parent;if (y->parent == T->nil){T->root = x;}else if (y == y->parent->left){y->parent->left = x;}else{y->parent->right = x;}if (y != z){
#if ENABLE_TYPE_CHARvoid *tmp = z->key;z->key = y->key;y->key = tmp;tmp = z->value;z->value= y->value;y->value = tmp;
#elsez->key = y->key;z->value = y->value;
#endif}if (y->color == BLACK){rbtree_delete_fixup(T, x);}return y;
}rbtree_node *rbtree_search(rbtree *T, KEY_TYPE key)
{rbtree_node *node = T->root;while (node != T->nil){
#if ENABLE_TYPE_CHARif (strcmp(key, node->key) < 0){node = node->left;}else if (strcmp(key, node->key) > 0){node = node->right;}else{return node;}
#elseif (key < node->key){node = node->left;}else if (key > node->key){node = node->right;}else{return node;}
#endif}return T->nil;
}void rbtree_traversal(rbtree *T, rbtree_node *node)
{if (node != T->nil){
#if ENABLE_TYPE_CHARprintf("key:%s, value:%s\n", node->key, (char *)node->value);
#elseprintf("key:%d, color:%d\n", node->key, node->color);
#endifrbtree_traversal(T, node->right);}
}typedef struct _rbtree kvs_rbtree_t;
kvs_rbtree_t global_rbtree;// 5 + 2
int kvs_rbtree_create(kvs_rbtree_t *inst)
{if (inst == NULL){return -1;}// 初始化红黑树, 构建根结点inst->nil = (rbtree_node *)kvs_malloc(sizeof(rbtree_node));inst->nil->color = BLACK;inst->root = inst->nil;return 0;
}void kvs_rbtree_destroy(kvs_rbtree_t *inst)
{if (inst == NULL){return ;}rbtree_node *node = NULL;while (!(node = inst->root)){rbtree_node *mini = rbtree_mini(inst, node);rbtree_node *cur = rbtree_delete(inst, mini);kvs_free(cur);}kvs_free(inst->nil);
}int kvs_rbtree_set(kvs_rbtree_t *inst, char *key, char *value)
{if (inst == NULL || key == NULL || value == NULL){return -1;}rbtree_node *node = (rbtree_node*)kvs_malloc(sizeof(rbtree_node));if (!node) return -2;node->key = kvs_malloc(strlen(key) + 1);if (!node->key) return -2;memset(node->key, 0, strlen(key) + 1);strcpy(node->key, key);node->value = kvs_malloc(strlen(value) + 1);if (!node->value) return -2;memset(node->value, 0, strlen(value) + 1);strcpy(node->value, value);rbtree_insert(inst, node);return 0;
}char* kvs_rbtree_get(kvs_rbtree_t *inst, char *key)
{if (inst == NULL || key == NULL){return NULL;}rbtree_node *node = rbtree_search(inst, key);if (!node) return NULL;if (node == inst->nil) return NULL;return node->value;
}int kvs_rbtree_del(kvs_rbtree_t *inst, char *key)
{if (inst == NULL || key == NULL){return -1;}rbtree_node *node = rbtree_search(inst, key);if (!node) return 1;rbtree_node *cur = rbtree_delete(inst, node);kvs_free(cur);return 0;
}int kvs_rbtree_mod(kvs_rbtree_t *inst, char *key, char *value)
{if (inst == NULL || key == NULL || value == NULL){return -1;}rbtree_node *node = rbtree_search(inst, key);if (!node) return 1;if (node == inst->nil) return 1;kvs_free(node->value);node->value = kvs_malloc(strlen(value) + 1);if (!node->value) return -2;memset(node->value, 0, strlen(value) + 1);strcpy(node->value, value);return 0;
}int kvs_rbtree_exist(kvs_rbtree_t *inst, char *key)
{if (inst == NULL || key == NULL){return -1;}rbtree_node *node = rbtree_search(inst, key);if (!node) return 1;if (node == inst->nil) return 1;return 0;
}
整个红黑树的存储结构实现如上所示,我们可以来测试一下对应的效果:
void testcase_rbtree(int connfd)
{testcase(connfd, "SET Teacher King", "OK\r\n", "SET-Teacher");testcase(connfd, "GET Teacher", "King\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "OK\r\n", "MOD-Teacher");testcase(connfd, "GET Teacher", "Mark\r\n", "GET-Teacher");testcase(connfd, "EXIST Teacher", "EXIST\r\n", "EXIST-Teacher");testcase(connfd, "DEL Teacher", "OK\r\n", "DEL-Teacher");testcase(connfd, "GET Teacher", "NOEXIST\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "NOEXIST\r\n", "MOD-Teacher");testcase(connfd, "EXIST Teacher", "NOEXIST\r\n", "EXIST-Teacher");
}
测试结果如下:
hash
接下来我们使用 hash 的数据结构来进行实现,hash 的插入以及查找的效率是非常高的,可以达到 O(1) 的时间复杂度,他的结构如下所示:
当前使用的的 hash 当中采用的是拉链法,当存在 hash 冲突以后采用链表的方式将对应的值链接在相应的位置的后面。
kvstore.h
#if ENABLE_HASH
#define MAX_KEY_LEN 128
#define MAX_VALUE_LEN 512
#define MAX_TABLE_SIZE 1024#define ENABLE_KEY_POINTER 1typedef struct hashnode_s {
#if ENABLE_KEY_POINTERchar *key;char *value;
#elsechar key[MAX_KEY_LEN];char value[MAX_VALUE_LEN];
#endifstruct hashnode_s *next;} hashnode_t;typedef struct hashtable_s {hashnode_t **nodes; //* change **, int max_slots;int count;
} hashtable_t;typedef struct hashtable_s kvs_hash_t;int kvs_hash_create(kvs_hash_t *hash);
void kvs_hash_destory(kvs_hash_t *hash);
int kvs_hash_set(hashtable_t *hash, char *key, char *value);
char * kvs_hash_get(kvs_hash_t *hash, char *key);
int kvs_hash_mod(kvs_hash_t *hash, char *key, char *value);
int kvs_hash_del(kvs_hash_t *hash, char *key);
int kvs_hash_exist(kvs_hash_t *hash, char *key);#endif
kvstore.c
#include "kvstore.h"#if ENABLE_ARRAY
extern kvs_array_t global_array;
#endif#if ENABLE_RBTREE
extern kvs_rbtree_t global_rbtree;
#endif#if ENABLE_HASH
extern kvs_hash_t global_hash;
#endifconst char *command[] = {// array"SET", "GET", "DEL", "MOD", "EXIST",// rbtree"RSET", "RGET", "RDEL", "RMOD", "REXIST",// hash"HSET", "HGET", "HDEL", "HMOD", "HEXIST"
};const char *response[] = {};// 枚举定义命令
enum
{KVS_CMD_START = 0,// arrayKVS_CMD_SET = KVS_CMD_START, // 0KVS_CMD_GET, // 1KVS_CMD_DEL, // 2KVS_CMD_MOD, // 3KVS_CMD_EXIST, // 4// rbtree KVS_CMD_RSET, // 5KVS_CMD_RGET, // 6KVS_CMD_RDEL, // 7KVS_CMD_RMOD, // 8KVS_CMD_REXIST, // 9// hashKVS_CMD_HSET, // 10KVS_CMD_HGET, // 11KVS_CMD_HDEL, // 12KVS_CMD_HMOD, // 13KVS_CMD_HEXIST, // 14KVS_CMD_COUNT // 15
};void* kvs_malloc(size_t size)
{return malloc(size);
}void kvs_free(void* ptr)
{return free(ptr);
}// tokens[0]: SET
// tokens[1]: Key
// tokens[2]: Value
int kvs_filter_protocal(char** tokens, int count, char* response)
{if (tokens[0] == NULL || count == 0 || response == NULL) {return -1;}int cmd = KVS_CMD_START;for(cmd = KVS_CMD_START; cmd < KVS_CMD_COUNT; cmd++) {if(strcmp(tokens[0], command[cmd]) == 0) {break;}}int ret, length = 0;char* key = tokens[1];char* value = tokens[2];switch(cmd) {
#if ENABLE_ARRAYcase KVS_CMD_SET:ret = kvs_array_set(&global_array, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_GET:{char* result = kvs_array_get(&global_array, key);if (result == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", result);}break;}case KVS_CMD_DEL:ret = kvs_array_del(&global_array, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_MOD:ret = kvs_array_mod(&global_array, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_EXIST:ret = kvs_array_exist(&global_array, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "EXIST\r\n");} else if (ret > 0) {length = sprintf(response, "NOEXIST\r\n");}break;
#endif
#if ENABLE_RBTREE// rbtreecase KVS_CMD_RSET:// printf("1\n");ret = kvs_rbtree_set(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_RGET:{char* result = kvs_rbtree_get(&global_rbtree, key);if (result == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", result);}break;}case KVS_CMD_RDEL:ret = kvs_rbtree_del(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_RMOD:ret = kvs_rbtree_mod(&global_rbtree, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_REXIST:ret = kvs_rbtree_exist(&global_rbtree, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "EXIST\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;
#endif
#if ENABLE_HASHcase KVS_CMD_HSET:ret = kvs_hash_set(&global_hash, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "EXIST\r\n");}break;case KVS_CMD_HGET:{char* result = kvs_hash_get(&global_hash, key);if (result == NULL) {length = sprintf(response, "NOEXIST\r\n");} else {length = sprintf(response, "%s\r\n", result);}break;}case KVS_CMD_HDEL:ret = kvs_hash_del(&global_hash, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_HMOD:ret = kvs_hash_mod(&global_hash, key, value);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "OK\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;case KVS_CMD_HEXIST:ret = kvs_hash_exist(&global_hash, key);if (ret < 0) {length = sprintf(response, "ERROR\r\n");} else if (ret == 0) {length = sprintf(response, "EXIST\r\n");} else {length = sprintf(response, "NOEXIST\r\n");}break;
#endifdefault:assert(0);}return length;
}// 返回对应的token数量
// SET Key Value
// GET Key
int kvs_split_token(char* msg, char* tokens[])
{if (msg == NULL || tokens == NULL) {return -1;}char* token = strtok(msg, " ");int idx = 0;while (token != NULL) {// printf("idx: %d, token: %s\n", idx, token);tokens[idx++] = token;token = strtok(NULL, " ");}return idx;
}
/*
* @brief 协议解析
* @param msg 消息体
* @param length 消息体长度
* @param response 响应体
* @return 0 成功 -1 失败
*/
// 协议解析
int kvs_protocal(char* msg, int length, char* response)
{if (msg == NULL || length <= 0 || response == NULL) {return -1;}// printf("recv: %d, %s\n", length, msg);char* tokens[KVS_MAX_TOKEN] = {0};int count = kvs_split_token(msg, tokens);if (count == -1) {return -1;}return kvs_filter_protocal(tokens, count, response);
}int init_kvengine(void)
{
#if ENABLE_ARRAYmemset(&global_array, 0, sizeof(kvs_array_t));kvs_array_create(&global_array);
#endif#if ENABLE_RBTREEmemset(&global_rbtree, 0, sizeof(kvs_rbtree_t));kvs_rbtree_create(&global_rbtree);
#endif#if ENABLE_HASHmemset(&global_hash, 0, sizeof(kvs_hash_t));kvs_hash_create(&global_hash);
#endifreturn 0;
}void destory_kvengine(void)
{
#if ENABLE_ARRAYkvs_array_destory(&global_array);
#endif#if ENABLE_RBTREEkvs_rbtree_destory(&global_rbtree);
#endif#if ENABLE_HASHkvs_hash_destory(&global_hash);
#endifreturn ;
}int main(int argc, char* argv[])
{init_kvengine();if (argc != 2) {printf("Usage: %s <port>\n", argv[0]);return -1;}if (NETWORK_TYPE == NETWORK_RECATOR) {reactor_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_PROACTOR) {proactor_entry(atoi(argv[1]), kvs_protocal);} else if (NETWORK_TYPE == NETWORK_NTYCO) {ntyco_start(atoi(argv[1]), kvs_protocal);}// 销毁kv引擎destory_kvengine();return 0;
}
kv_hash.c
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "kvstore.h"// Key, Value -->
// Modify
kvs_hash_t global_hash;//Connection
// 'C' + 'o' + 'n'
static int _hash(char *key, int size) {if (!key) return -1;int sum = 0;int i = 0;while (key[i] != 0) {sum += key[i];i ++;}return sum % size;
}hashnode_t *_create_node(char *key, char *value) {hashnode_t *node = (hashnode_t*)kvs_malloc(sizeof(hashnode_t));if (!node) return NULL;#if ENABLE_KEY_POINTERchar *kcopy = kvs_malloc(strlen(key) + 1);if (kcopy == NULL) return NULL;memset(kcopy, 0, strlen(key) + 1);strncpy(kcopy, key, strlen(key));node->key = kcopy;char *kvalue = kvs_malloc(strlen(value) + 1);if (kvalue == NULL) { kvs_free(kvalue);return NULL;}memset(kvalue, 0, strlen(value) + 1);strncpy(kvalue, value, strlen(value));node->value = kvalue;#elsestrncpy(node->key, key, MAX_KEY_LEN);strncpy(node->value, value, MAX_VALUE_LEN);
#endifnode->next = NULL;return node;
}int kvs_hash_create(kvs_hash_t *hash) {if (!hash) return -1;hash->nodes = (hashnode_t**)kvs_malloc(sizeof(hashnode_t*) * MAX_TABLE_SIZE);if (!hash->nodes) return -1;hash->max_slots = MAX_TABLE_SIZE;hash->count = 0; return 0;
}void kvs_hash_destory(kvs_hash_t *hash) {if (!hash) return;int i = 0;for (i = 0;i < hash->max_slots; i++) {hashnode_t *node = hash->nodes[i];while (node != NULL) { // errorhashnode_t *tmp = node;node = node->next;hash->nodes[i] = node;kvs_free(tmp);}}kvs_free(hash->nodes);
}int kvs_hash_set(kvs_hash_t *hash, char *key, char *value) {if (!hash || !key || !value) return -1;int idx = _hash(key, MAX_TABLE_SIZE);hashnode_t *node = hash->nodes[idx];
#if 1while (node != NULL) {if (strcmp(node->key, key) == 0) { // existreturn 1;}node = node->next;}
#endifhashnode_t *new_node = _create_node(key, value);new_node->next = hash->nodes[idx];hash->nodes[idx] = new_node;hash->count ++;return 0;
}char * kvs_hash_get(kvs_hash_t *hash, char *key) {if (!hash || !key) return NULL;int idx = _hash(key, MAX_TABLE_SIZE);hashnode_t *node = hash->nodes[idx];while (node != NULL) {if (strcmp(node->key, key) == 0) {return node->value;}node = node->next;}return NULL;
}int kvs_hash_mod(kvs_hash_t *hash, char *key, char *value) {if (!hash || !key) return -1;int idx = _hash(key, MAX_TABLE_SIZE);hashnode_t *node = hash->nodes[idx];while (node != NULL) {if (strcmp(node->key, key) == 0) {break;}node = node->next;}if (node == NULL) {return 1;}// node --> kvs_free(node->value);char *kvalue = kvs_malloc(strlen(value) + 1);if (kvalue == NULL) return -2;memset(kvalue, 0, strlen(value) + 1);strncpy(kvalue, value, strlen(value));node->value = kvalue;return 0;
}int kvs_hash_count(kvs_hash_t *hash) {return hash->count;
}int kvs_hash_del(kvs_hash_t *hash, char *key) {if (!hash || !key) return -2;int idx = _hash(key, MAX_TABLE_SIZE);hashnode_t *head = hash->nodes[idx];if (head == NULL) return -1; // noexist// head nodeif (strcmp(head->key, key) == 0) {hashnode_t *tmp = head->next;hash->nodes[idx] = tmp;kvs_free(head);hash->count --;return 0;}hashnode_t *cur = head;while (cur->next != NULL) {if (strcmp(cur->next->key, key) == 0) break; // search nodecur = cur->next;}if (cur->next == NULL) {return -1;}hashnode_t *tmp = cur->next;cur->next = tmp->next;
#if ENABLE_KEY_POINTERkvs_free(tmp->key);kvs_free(tmp->value);
#endifkvs_free(tmp);hash->count --;return 0;
}int kvs_hash_exist(kvs_hash_t *hash, char *key) {char *value = kvs_hash_get(hash, key);if (!value) return 1;return 0;
}
测试结果如下:
跳表
表本质上是一个有序链表。红黑树每次比较都能排除一半的节点,这启发我们,要是每次都能找到链表最中间的节点,不就可以实现O ( log N ) O(\log N)O(logN)的查找时间复杂度了嘛。于是如下图所示,我们不妨规定跳表的每个节点都有一组指针,跳表还有一个额外的空节点作为“跳表头”,那么每次都从顶层依次底层进行“跳”,就可以实现“每次比较都能排除剩下一半的节点”。但是还有个大问题,那就是上述理想跳表需要插入/删除一个元素时,元素的调整会非常麻烦,甚至还需要遍历整个链表来调整所有节点的指向。
所以在实际应用中,不会直接使用上述理想跳表的结构。而是在每次插入一个新元素时,按照一定概率计算其高度。统计学证明,当存放元素足够多的时候,该实际跳表性能无限趋近于理想跳表
同样,整体框架也跟上面的 hash,rbtree 实现也是一样的,这个代码的大体框架如下,这儿并没有去完整的进行实现,后续待博主研究明白跳表以后在进行实现:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "kvstore.h"#define MAX_LEVEL 6#define ENABLE_TYPE_CHAR 1#if ENABLE_TYPE_CHARtypedef char* KEY_TYPE;#elsetypedef int KEY_TYPE;#endiftypedef struct Node {
#if ENABLE_TYPE_CHARKEY_TYPE key;KEY_TYPE value;
#elseint key;int value;
#endifstruct Node** forward;
} Node;typedef struct SkipList {int level;Node* header;
} SkipList;Node* createNode(int level, KEY_TYPE key, KEY_TYPE value) {Node* newNode = (Node*)malloc(sizeof(Node));newNode->key = key;newNode->value = value;newNode->forward = (Node**)malloc((level + 1) * sizeof(Node*));return newNode;
}SkipList* createSkipList() {SkipList* skipList = (SkipList*)malloc(sizeof(SkipList));skipList->level = 0;skipList->header = createNode(MAX_LEVEL, "-1", "-1");for (int i = 0; i <= MAX_LEVEL; ++i) {skipList->header->forward[i] = NULL;}return skipList;
}int randomLevel() {int level = 0;while (rand() < RAND_MAX / 2 && level < MAX_LEVEL)level++;return level;
}bool insert(SkipList* skipList, KEY_TYPE key, KEY_TYPE value) {Node* update[MAX_LEVEL + 1];Node* current = skipList->header;for (int i = skipList->level; i >= 0; --i) {
#if ENABLE_SKIPLISTwhile (current->forward[i] != NULL && strcmp(current->forward[i]->key, key) > 0)
#elsewhile (current->forward[i] != NULL && current->forward[i]->key < key)
#endifcurrent = current->forward[i];update[i] = current;}current = current->forward[0];
#if ENABLE_SKIPLISTif (current == NULL || strcmp(current->key, key) != 0) {
#elseif (current == NULL || current->key != key) {
#endifint level = randomLevel();if (level > skipList->level) {for (int i = skipList->level + 1; i <= level; ++i)update[i] = skipList->header;skipList->level = level;}Node* newNode = createNode(level, key, value);for (int i = 0; i <= level; ++i) {newNode->forward[i] = update[i]->forward[i];update[i]->forward[i] = newNode;}printf("Inserted key %s\n", key);return true;} else {printf("Key %s already exists\n", key);return false;}
}void display(SkipList* skipList) {printf("Skip List:\n");for (int i = 0; i <= skipList->level; ++i) {Node* node = skipList->header->forward[i];printf("Level %d: ", i);while (node != NULL) {printf("%s ", node->key);node = node->forward[i];}printf("\n");}
}bool search(SkipList* skipList, KEY_TYPE key) {Node* current = skipList->header;for (int i = skipList->level; i >= 0; --i) {
#if ENABLE_SKIPLISTwhile (current->forward[i] != NULL && strcmp(current->forward[i]->key, key) > 0)
#elsewhile (current->forward[i] != NULL && current->forward[i]->key < key)
#endifcurrent = current->forward[i];}current = current -> forward[0];
#if ENABLE_SKIPLISTif(current && strcmp(current->key, key) == 0){
#elseif(current && current -> key == key){
#endifprintf("Key %s found with value %s\n", key, current->value);return true;}else{printf("Key %s not found\n", key);return false;}
}typedef struct SkipList kvs_skiplist_t;
kvs_skiplist_t global_skiplist;
// 5 + 2
int kvs_skiplist_create(kvs_skiplist_t *skiplist) {}
void kvs_skiplist_destory(kvs_skiplist_t *skiplist) {}
int kvs_skiplist_set(kvs_skiplist_t *skiplist, char *key, char *value) {}
char * kvs_skiplist_get(kvs_skiplist_t *skiplist, char *key) {}
int kvs_skiplist_mod(kvs_skiplist_t *skiplist, char *key, char *value) {}
int kvs_skiplist_del(kvs_skiplist_t *skiplist, char *key) {}
int kvs_skiplist_exist(kvs_skiplist_t *skiplist, char *key) {}
性能测试
array
针对于实现的数组,rbtree,hash 我们来进行以下性能测试,首先是针对于10w条连接,我们来看一下对应的 qps ,下面是数组的测试用例,其他的进行一下修改即可:
void testcase_array_10w(int connfd)
{struct timeval tv_begin;gettimeofday(&tv_begin, NULL);for (int i = 0; i < 100000; i++) {testcase(connfd, "SET Teacher King", "OK\r\n", "SET-Teacher");testcase(connfd, "GET Teacher", "King\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "OK\r\n", "MOD-Teacher");testcase(connfd, "GET Teacher", "Mark\r\n", "GET-Teacher");testcase(connfd, "EXIST Teacher", "EXIST\r\n", "EXIST-Teacher");testcase(connfd, "DEL Teacher", "OK\r\n", "DEL-Teacher");testcase(connfd, "GET Teacher", "NOEXIST\r\n", "GET-Teacher");testcase(connfd, "MOD Teacher Mark", "NOEXIST\r\n", "MOD-Teacher");testcase(connfd, "EXIST Teacher", "NOEXIST\r\n", "EXIST-Teacher");}struct timeval tv_end;gettimeofday(&tv_end, NULL);int time_used = TIME_SUB_MS(tv_end, tv_begin); // msprintf("testcase_10w cost %d ms, %d qps/s\n", time_used, 900000 * 1000 / time_used);
}
可以看见,测试结果如下,array 对应的 qps 保持在 14500 qps/s 左右进行波动:
rbtree
可以看见,测试结果如下,array 对应的 qps 依然是保持在 14500 qps/s 左右进行波动:
hash
可以看见,测试结果如下,array 对应的 qps 依然是保持在 14800 qps/s 左右进行波动:
其实对于三者的处理效率上并没有太大的差别,上面采取的是 recator 进行测试的,如果有兴趣也可以采用协程和 preactor 进行一下测试,preactor 其实在效率上是略微快一点儿的。
项目总结
以上就是整个 KV 存储引擎的实现,它只要是仿照 Redis 进行设计的,也是将数据存储在内存当中,但是 Redis 就是一个非常大的类别的数据库,可以存储各种各样的东西,而我们实现的这个 KV 存储引擎他可以针对于某些数据的存储,就意味着可以对性能进行优化到极致。
整个项目的整体架构也如下图所示:
- 网络层:网络层使用了三种方式来进行实现,reactor,io_uring,Ntyco,但是都进行了封装,封装以后,其实对外界来说,是看不出来是使用哪一个来进行实现的,外界感知不到。
- 协议层:协议层就是仿照 Redis 的协议进行设计,虽然已经有了 TCP 协议,但是收发数据的协议规则依然需要我们自己去进行制定。
- 核心层:核心层其实就是 main 函数的入口,kvstore 就放在这儿进行实现,他就包括实现了与网络层进行互通,协议的制定,后续跟接口处进行适配,当然也包括了一些规则的制定等等。
- 接口适配层:当前采用的就是一个 5 + 2 的工作模式,
(set + get + mod + del + exist + create + destory)
的规则来进行接口设计,后续任何数据结构的添加都可以参考这种方式来进行设计。 - kv引擎层:kv引擎层就是核心的数据结构,包括array,rbtree,hash,skiplist等,接口层对应的接口的具体实现以及数据结构的具体实现都是在kv引擎层的。
优化点
- 当前项目在内存申请方面是直接采取 malloc 函数进行操作的,我们可以针对于当前 kv 引擎自己去实现一个内存池结构进行适配,在申请内存的过程中从内存池当中进行申请,这样可以减少内存碎片的产生,同时也可以提高处理的效率。
- 数据持久化:当前是将数据保存在内存当中的,程序一旦崩溃或者设备断电,数据也就没了,我们也可以向 Redis 一样去制定一个自己的数据持久化策略,将一些数据保存在磁盘当中去。
- 主从复制的策略:当前数据都是保存在一台服务器上的,这套服务器出问题了数据也就没了,我们是不也可以采用主从的保存方式,主数据库出现问题了,从库接管接下来的操作,这样就可以保证数据的安全性。