获取ftp目录及子目录下的文件并下载下来
/*
利用AI写了一个在fedora系统下的c++程序,
实现功能为,先获取ftp指定目录及子目录下的文件名称,然后把本地目录中没有的文件下载下来。
# 安装依赖
sudo dnf install libcurl-devel
# 编译
g++ -o ftp_sync_curl ftp_sync_curl.cpp -lcurl -lstdc++fs -O2
*/
#include <iostream>
#include <string>
#include <vector>
#include <set>
#include <fstream>
#include <filesystem>
#include <curl/curl.h>
#include <regex>
using namespace std;
namespace fs = std::filesystem;
// 配置信息
const string FTP_SERVER = "ftp://example.com/";
const string FTP_USERPWD = "username:password";
const string REMOTE_BASE = "/remote/path/";
const string LOCAL_BASE = "/local/path/";
// 用于存储目录列表的结构体
struct FtpFileInfo {
string path;
bool is_directory;
};
// libcurl写入回调
size_t write_callback(void* ptr, size_t size, size_t nmemb, string* data) {
data->append((char*)ptr, size * nmemb);
return size * nmemb;
}
// 解析FTP LIST输出
vector<FtpFileInfo> parse_ftp_list(const string& list) {
vector<FtpFileInfo> files;
// 改进的正则表达式,更准确匹配UNIX风格列表
regex unix_pattern(R"(^([d-])([rwx-]{9})\s+\d+\s+\w+\s+\w+\s+(\d+)\s+(\w+\s+\d+\s+[\d:]+)\s+(.+)$)");
istringstream iss(list);
string line;
while (getline(iss, line)) {
smatch match;
if (regex_match(line, match, unix_pattern)) {
if (match.size() >= 6) {
FtpFileInfo info;
// 修正:访问第一个捕获组(文件类型标识)
info.is_directory = (match.str() == "d"); // 正确比较方式
info.path = match.str(); // 第5个捕获组是文件名
files.push_back(info);
}
}
else {
cerr << "Unmatched line: " << line << endl;
}
}
return files;
}
// 递归获取远程文件列表
void get_remote_files(CURL* curl, const string& path, set<string>& result) {
string full_url = FTP_SERVER + path;
string list_buffer;
curl_easy_setopt(curl, CURLOPT_URL, full_url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &list_buffer);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
cerr << "curl failed: " << curl_easy_strerror(res) << endl;
return;
}
vector<FtpFileInfo> files = parse_ftp_list(list_buffer);
for (const auto& fi : files) {
if (fi.path == "." || fi.path == "..") continue;
string full_path = path + fi.path;
if (fi.is_directory) {
get_remote_files(curl, full_path + "/", result);
} else {
result.insert(full_path.substr(REMOTE_BASE.length()));
}
}
}
// 获取本地文件列表
set<string> get_local_files() {
set<string> local_files;
for (const auto& entry : fs::recursive_directory_iterator(LOCAL_BASE)) {
if (entry.is_regular_file()) {
string path = entry.path().string().substr(LOCAL_BASE.length());
// 统一路径分隔符
replace(path.begin(), path.end(), '\\', '/');
local_files.insert(path);
}
}
return local_files;
}
// 下载单个文件
bool download_file(CURL* curl, const string& remote_path, const string& local_path) {
// 创建本地目录
fs::path local_dir = fs::path(local_path).parent_path();
if (!fs::exists(local_dir)) {
fs::create_directories(local_dir);
}
ofstream ofs(local_path, ios::binary);
if (!ofs) {
cerr << "Failed to create: " << local_path << endl;
return false;
}
string url = FTP_SERVER + REMOTE_BASE + remote_path;
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ofs);
CURLcode res = curl_easy_perform(curl);
if (res != CURLE_OK) {
cerr << "Download failed: " << curl_easy_strerror(res) << endl;
return false;
}
return true;
}
int main() {
CURL* curl = curl_easy_init();
if (!curl) {
cerr << "Failed to initialize libcurl" << endl;
return 1;
}
// 初始化curl选项
curl_easy_setopt(curl, CURLOPT_USERPWD, FTP_USERPWD.c_str());
curl_easy_setopt(curl, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_SINGLECWD);
curl_easy_setopt(curl, CURLOPT_FTP_USE_EPSV, 0L); // 禁用EPSV
try {
// 获取远程文件列表
set<string> remote_files;
get_remote_files(curl, REMOTE_BASE, remote_files);
cout << "Found " << remote_files.size() << " remote files" << endl;
// 获取本地文件列表
set<string> local_files = get_local_files();
cout << "Found " << local_files.size() << " local files" << endl;
// 比较并下载缺失文件
for (const auto& file : remote_files) {
if (!local_files.count(file)) {
cout << "Downloading: " << file << "...";
if (download_file(curl, file, LOCAL_BASE + file)) {
cout << "OK" << endl;
}
}
}
}
catch (const exception& e) {
cerr << "Error: " << e.what() << endl;
}
curl_easy_cleanup(curl);
return 0;
}