关于文件分片的介绍和应用
文件分片,顾名思义,就是将一个大文件分割成多个小的文件块(chunk)。每个文件块都是原始文件的一部分,并可以通过特定的方式将这些小文件块重新组装成原始文件。
1. 基本原理:
文件分片从底层来看,主要是对文件进行字节级别的读取和分割。可以将文件看作一个巨大的字节数组,分片的过程就是按照一定的规则将这个字节数组切割成多个小数组,然后将每个小数组写入到不同的文件中。
-
定义分片大小 (Chunk Size): 这是核心参数。所有的分片都会被切割成这个大小 (最后一个分片可能小于这个大小).
-
读取文件: 用二进制方式读取文件.
-
分割数据: 根据
Chunk Size
,将读取到的文件内容分割成多个字节数组. -
写入分片文件: 每个字节数组 (chunk) 被写入到单独的文件中。 通常,会为每个分片文件命名,以便后续可以按顺序组装它们。
-
元数据:在分片的同时,通常会生成一个元数据文件,其中包含原始文件名、分片数量、每个分片的顺序和大小等信息。这个元数据文件是后续重新组装文件的关键。
2. 关键技术:
-
文件读取: 使用
InputStream
或FileChannel
等进行二进制文件读取.FileChannel
通常提供更高的性能. -
字节数组操作: 使用
byte[]
来存储和操作文件数据. -
文件写入: 使用
OutputStream
或FileChannel
将字节数组写入到各个分片文件中。同样的,FileChannel
通常有更好的性能。 -
偏移量 (Offset): 在读取和写入过程中,需要记录每个分片的起始位置(偏移量),以便正确地分割和组装文件。
3.参考代码(来源于练习项目)
package com.frontend.file;import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Comparator;
import java.util.stream.Stream;public class FileMerger{private static final String tempFilePath = "D:/data/project/com.frontEndProject/temp/download";private static final String userFilePath ="D:/data/project/com.frontEndProject/userFile";private final String inputFolder;private final String outputFolder;private final String fileName;public FileMerger(String inputFolder, String outputFolder, String fileName) {this.inputFolder = inputFolder;this.outputFolder = outputFolder;this.fileName = fileName;}public Path mergeFiles() throws IOException {Path chunkPath= Paths.get(tempFilePath+"/"+inputFolder);Path parentDir=Paths.get(userFilePath+"/"+outputFolder);Path saveFilePath=Paths.get(userFilePath+"/"+outputFolder+"/"+fileName);if(!Files.exists(chunkPath)){Files.createDirectories(chunkPath);}if(!Files.exists(parentDir)){Files.createDirectories(parentDir);}if(!Files.exists(saveFilePath)){Files.createFile(saveFilePath);}try(Stream<Path> paths = Files.list(chunkPath).sorted(Comparator.comparing(p->p.getFileName().toString()))){try(FileOutputStream fos=new FileOutputStream(saveFilePath.toFile()); BufferedOutputStream bos=new BufferedOutputStream(fos)){paths.forEach(chunkFile->{try(FileInputStream fis=new FileInputStream(chunkFile.toFile()); BufferedInputStream bis=new BufferedInputStream(fis)) {byte[] buffer=new byte[8192];int bytesRead;while((bytesRead=bis.read(buffer))!=-1){bos.write(buffer,0,bytesRead);}} catch (IOException e) {throw new RuntimeException(e);}});}}return saveFilePath;}
}
package com.frontend.file;import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.NoSuchAlgorithmException;
import java.util.Comparator;
import java.util.stream.Stream;public class FileSplitter{private static final String tempFilePath = "D:/data/project/com.frontEndProject/temp/upload";private final File sourceFile;private final String outputFolder;private final int chunkSize;public FileSplitter(File sourceFile, String outputFolder, int chunkSize){this.sourceFile = sourceFile;this.outputFolder = outputFolder;this.chunkSize = chunkSize;}public Stream<Path> splitFile() throws IOException {Path outputPath = Paths.get(tempFilePath +"/"+outputFolder);if(!Files.exists(outputPath)){Files.createDirectories(outputPath);}try(FileInputStream fis = new FileInputStream(sourceFile); BufferedInputStream bis = new BufferedInputStream(fis)){byte[] buffer=new byte[chunkSize];int index=0,bytesRead;String fileHash= FileHash.calculateHash(sourceFile,"SHA-256");while ((bytesRead=bis.read(buffer))!=-1){String chunkFileName=String.format("%s%06d.temp",fileHash,++index);Path tempFilePath=outputPath.resolve(chunkFileName);if(Files.exists(tempFilePath))continue;try(FileOutputStream fos = new FileOutputStream(tempFilePath.toFile()); BufferedOutputStream bos = new BufferedOutputStream(fos)){bos.write(buffer,0,bytesRead);}}} catch (NoSuchAlgorithmException e) {throw new RuntimeException(e);}Stream<Path> result;try {result =Files.list(outputPath).sorted(Comparator.comparing(p->p.getFileName().toString()));} catch (IOException e) {throw new RuntimeException(e);}return result;}
}
package com.frontend.file;import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;public class FileHash{public static String calculateHash(File file, String algorithm) throws NoSuchAlgorithmException, IOException, IOException {MessageDigest messageDigest= MessageDigest.getInstance(algorithm);try(FileInputStream fr=new FileInputStream(file)){byte[] buffer=new byte[8192];int readBytes;while ((readBytes=fr.read(buffer))!=-1){messageDigest.update(buffer,0,readBytes);}}byte[] digest=messageDigest.digest();StringBuilder stringBuilder=new StringBuilder();for(byte b:digest){stringBuilder.append(String.format("%02x",b));}return stringBuilder.toString();}public static boolean VerifyFileHash(String fileHash, String saveRecordFilePath) throws IOException, NoSuchAlgorithmException {File file = new File(saveRecordFilePath);if(!file.exists()) return false;String verifyFileHash=calculateHash(Paths.get(saveRecordFilePath).toFile(),"SHA-256");return verifyFileHash.equals(fileHash);}public static void main(String[] args) throws NoSuchAlgorithmException, IOException {Path filePath= Paths.get("D:\\code\\Java\\frontEndProject\\src\\main\\java\\com\\frontend\\controller\\EditorialMaterialController.java");String result=calculateHash(filePath.toFile(),"SHA-256");System.out.println(result.length());}
}