java实现多线程分片下载超大文件,支持HTTPS。
这段时间研究了下文件备份,用了几个软件都觉得不太理想,还是自己写的好用。主要功能是:
1、待备份文件服务器,提供WEB服务写代码列出要备份的文件列表和下载地址。
2、备份程序,定时读取URL页面,解析得到备份文件列表和地址,并下载保存。
这样就可以实现任何要备份的文件都可以使用WEB列表模式下载备份。
直接上代码downx.java负责下载文件:
package mycmf.down;import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.concurrent.*;
import java.math.BigInteger; public class downx {//private static final String FILE_URL = "http://10.10.34.211:8886/backup/mycmf3/mycmf20251022023004.rar";//private static final String DEST_FILE_PATH = "G:\\backup_test2\\mycmf20251022023004.rar";//private static final String FILE_URL = "http://10.10.34.211:8886/mycmf1/do_video/202509/20250923_003537.mp4";//private static final String DEST_FILE_PATH = "G:\\backup_test2\\20250923_003537.mp4";//private static final int PART_SIZE = 10 * 1024 * 1024; // 10MBstatic int PART_SIZE = 10 * 1024 * 1024; // 10MBstatic int PART_COUNT= 0; // 10MBstatic String FILE_URL = "";static String FILE_PATH = "";static String TEMP_PATH = "";static int SLEEP = 10; //分片下载休息毫秒数public static void main(String[] args) { downx DW = new downx();String FILEURL = "http://10.10.34.211:8886/mycmf1/do_video/202509/20250923_003537.mp4";String FILEPATH = "G:\\backup_test2\\20250923_003537.mp4";String TEMPPATH = "G:\\temp\\20250923_003537.mp4";long startTime = System.currentTimeMillis(); //开始运行String ree = DW.runDown(FILEURL , FILEPATH , TEMPPATH );long endTime = System.currentTimeMillis(); //结束运行float seconds = (endTime - startTime) / 1000F; //耗时int secs = (int) Math.floor(seconds);System.out.println(FILEURL + "下载耗时:" + secs + "秒");System.out.println(ree);}public void setSLEEP(int v) { SLEEP = v ; }public void setPARTSIZE(int v) { PART_SIZE = v ; }public String runDown(String FileUrl , String saveDir , String tempDir) {FILE_URL = FileUrl ;FILE_PATH = saveDir ;TEMP_PATH = tempDir ;String stat = "";try {long fileSize = getFileSize(FILE_URL);if(fileSize<1) { stat = "ERR:文件不存在" ; return stat; }int partCount = (int) Math.ceil((double) fileSize / PART_SIZE);PART_COUNT = partCount; System.out.println("文件Byte:" + fileSize);System.out.println("所有文件分片:" + partCount);ExecutorService executor = Executors.newFixedThreadPool(4); // 使用 4 个线程for (int i = 0; i < partCount; i++) {//long startByte = i * PART_SIZE; //用long不行,超过10亿加1会变负数,该用BigInteger//long endByte = Math.min((i + 1) * PART_SIZE - 1, fileSize - 1);BigInteger PSZ = BigInteger.valueOf(PART_SIZE);BigInteger startByte = PSZ.multiply(BigInteger.valueOf(i)); //相乘BigInteger endByte = startByte.add(PSZ); //相加int result = endByte.compareTo(BigInteger.valueOf(fileSize-1)); //检查是否最后一页if(result > 0 ) { endByte = BigInteger.valueOf(fileSize) ; }endByte = endByte.subtract(BigInteger.valueOf(1)); //减1System.out.println(FILE_URL + " 创建分片下载线程:" + i + " / "+partCount+" Range="+startByte+" - " + endByte + " : " + result );executor.submit(new DownloadTask(FILE_URL, FILE_PATH, TEMP_PATH , startByte, endByte, i));if(SLEEP<=10) {if(fileSize > PART_SIZE) {try { Thread.sleep(500); } catch (Exception e){ } }else {try { Thread.sleep(100); } catch (Exception e){ }}}else {try { Thread.sleep(SLEEP); } catch (Exception e){ }}}executor.shutdown();executor.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS);System.out.println("所有文件分片下载完成,正在合并文件...");mergeFileParts(partCount); //合并分片文件System.out.println("文件合并完成,下载成功!");stat = "PASS:下载成功!";} catch (Exception e) {e.printStackTrace();}return stat ;}private static long getFileSize(String fileUrl) throws IOException {URL url = new URL(fileUrl);HttpURLConnection connection = (HttpURLConnection) url.openConnection();connection.setRequestMethod("HEAD");connection.connect();return connection.getContentLengthLong();}static class DownloadTask implements Runnable {private String fileUrl;private String destFilePath;private String tempFilePath;private BigInteger startByte;private BigInteger endByte;private int partIndex;public DownloadTask(String fileUrl, String destFilePath, String tempFile, BigInteger startByte, BigInteger endByte, int partIndex) {this.fileUrl = fileUrl;this.destFilePath = destFilePath;this.tempFilePath = tempFile;this.startByte = startByte;this.endByte = endByte;this.partIndex = partIndex; }@Overridepublic void run() {try {URL url = new URL(fileUrl);HttpURLConnection connection = (HttpURLConnection) url.openConnection();connection.setRequestProperty("Range", "bytes=" + startByte + "-" + endByte);connection.connect();try (InputStream inputStream = connection.getInputStream();RandomAccessFile raf = new RandomAccessFile(tempFilePath + ".part" + partIndex, "rw")) {byte[] buffer = new byte[8192]; // 使用较大的缓冲区int bytesRead;while ((bytesRead = inputStream.read(buffer)) != -1) {raf.write(buffer, 0, bytesRead);}System.out.println(tempFilePath + ".part" + partIndex + " 分片 " + partIndex + " / " + PART_COUNT + " 下载完成!");}} catch (IOException e) {e.printStackTrace();} }}private static void mergeFileParts(int partCount) throws IOException {try (RandomAccessFile mergedFile = new RandomAccessFile(FILE_PATH, "rw")) {byte[] buffer = new byte[8192];buffer = new byte[1024 * 1024 * 2]; for (int i = 0; i < partCount; i++) {try (RandomAccessFile partFile = new RandomAccessFile(TEMP_PATH + ".part" + i, "r")) {int bytesRead;while ((bytesRead = partFile.read(buffer)) != -1) {mergedFile.write(buffer, 0, bytesRead);}}new File(TEMP_PATH + ".part" + i).delete(); // 删除已合并的分片文件}}}}
index.java负责抓取网页,找到需要下载的文件列表:
package mycmf.down;import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.security.SecureRandom;import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.security.cert.CertificateException;
import javax.security.cert.X509Certificate;import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;public class index {static String tempDir = "G:/temp";static int SLEEPP = 0 ; //11为最快static int PARTSIZE = 0 ; //10 * 1024 * 1024 ; // 10MB//下载URL下所有链接里的文件public static void main(String[] args) { //index INDX = new index();String urll = "http://10.10.34.211:8886/mycmf3/baklist/2XXX.jsp";urll = "https://www.bzbzyy.com/mycmf3/baklist/2.jsp";String rootDir = "G:/backup_test2/" ;String cfgfile = ""; try{ cfgfile = args[0]; }catch(Exception e){ }if(cfgfile.length()>5) {String cfg = getTextFull("utf-8", cfgfile);tempDir = getMYXML(cfg , "tempdir" , "<>").trim();urll = getMYXML(cfg , "bakurl" , "<>").trim();rootDir = getMYXML(cfg , "locfod" , "<>").trim();String sep = getMYXML(cfg , "partsep" , "<>").trim();String psz = getMYXML(cfg , "partsize" , "<>").trim();try{SLEEPP = Integer.parseInt(sep) ; }catch(Exception e){ }try{PARTSIZE = Integer.parseInt(psz) ; }catch(Exception e){ }try{ makeDir(tempDir); }catch(Exception e){ }try{ makeDir(rootDir); }catch(Exception e){ }System.out.println("读取到配置:" + cfg);}if(tempDir.length()>4 && urll.length()>4 && rootDir.length()>4) {System.out.println("开始下载备份:");System.out.println("tempdir:" + tempDir);System.out.println("bakurl:" + urll);System.out.println("locfod:" + rootDir);System.out.println("partsep:" + SLEEPP);System.out.println("partsize:" + PARTSIZE);System.out.println("-----------------------------------");String htm = getUrlHtml("UTF-8",urll);//System.out.println(htm);String ree = getLink(urll , rootDir);System.out.println(ree);}else{System.out.println("缺少参数:");System.out.println("tempdir:" + tempDir);System.out.println("bakurl:" + urll);System.out.println("locfod:" + rootDir);System.out.println("partsep:" + SLEEPP);System.out.println("partsize:" + PARTSIZE);System.out.println("-----------------------------------");}}public static String getLink(String urll , String fodname ) {String htm = getUrlHtml("UTF-8",urll).trim();if(htm.length()>10) {String saveDir = fodname ;if(fodname.trim().length()>0) { try{ makeDir(saveDir); }catch(Exception e){ } }Document doc = Jsoup.parse(htm,"utf-8");Elements links = doc.select("a");for (Element link : links) {String href = link.attr("href").trim();//System.out.println(href);if(href.length()>3) {String ext = ""; if(href.indexOf(".")>0) { ext = href.substring( href.lastIndexOf(".")+1 , href.length()); }if(ext.length()<6 && ext.length()>1) { //是文件,则下载String fileName = href.substring(href.lastIndexOf("/")+1 , href.length());String saveFile = saveDir + "/" + fileName ;String tempFile = tempDir + "/" + fileName + "-" + System.currentTimeMillis() ;if(!file_exists(saveFile)){System.out.println(href + " > " + saveFile);downx DWN = new downx();if(SLEEPP > 10) { DWN.setSLEEP(SLEEPP); }if(PARTSIZE > 1024) { DWN.setPARTSIZE(PARTSIZE); }String ree = DWN.runDown(href , saveFile , tempFile ); }}else { //是目录System.out.println(href + " [目录] " );String fodd = link.text().trim();if(fodd.length()>0) {getLink(href , saveDir+"/"+fodd );}}}}}else{return "没有采集到下载文件";}return htm ;}////////////////////////////////////////////////////////////////以下为工具类//////////////////////////////////////////////////////////////public static String getUrlHtml(String charset,String url){String PageURL = url;String urlSource=PageURL;StringBuffer htmlBuffer=new StringBuffer();String returnStr=null;turnOffCertificateValidation(); //支持httpstry{ URL su = new URL (urlSource);URLConnection conn = su.openConnection();if(charset==null){ charset = ""; }if(charset.length()<1){ charset = "GBK" ; }InputStream imageSource=new URL(urlSource).openStream();int ch;while((ch=imageSource.read())>-1){htmlBuffer.append((char)ch);}imageSource.close();returnStr= new String(htmlBuffer);returnStr=new String(returnStr.getBytes("ISO8859_1"), charset );}catch(Exception e){ }if(returnStr!=null){return returnStr ;}else{return "empty" ;}} public static void turnOffCertificateValidation() {try {TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() {public java.security.cert.X509Certificate[] getAcceptedIssuers1() {return null;}public void checkClientTrusted(X509Certificate[] certs, String authType) {}public void checkServerTrusted(X509Certificate[] certs, String authType) {}@Overridepublic void checkClientTrusted(java.security.cert.X509Certificate[] arg0, String arg1)throws java.security.cert.CertificateException {// TODO Auto-generated method stub}@Overridepublic void checkServerTrusted(java.security.cert.X509Certificate[] arg0, String arg1)throws java.security.cert.CertificateException {// TODO Auto-generated method stub}@Overridepublic java.security.cert.X509Certificate[] getAcceptedIssuers() {// TODO Auto-generated method stubreturn null;}}};SSLContext sc = SSLContext.getInstance("SSL");sc.init(null, trustAllCerts, new SecureRandom());HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());} catch (Exception e) {e.printStackTrace();}} public static boolean makeDir(String dirs) throws Exception {boolean result = false;try {File fi = new File(dirs);//创建目录result = fi.mkdirs();} catch (Exception e) {result = false;//System.err.println(e.getMessage());}return result;}//TODO UB.file_exists 判断文件或文件夹是否存在public static boolean file_exists(String thefile){//boolean exists = (new File("filename")).exists();boolean exists = false ;try{ exists = (new File(thefile)).exists(); } catch (Exception e) { }return exists ;}//TODO UB.getText 读指定编码的文件public static String getTextFull(String code ,String Dir){if(code.length()<1){ code = "UTF-8";}//System.out.println(code+"="+Dir);//System.out.println("[getText(code,Dir):" + code + "]" + Dir) ;String str = "";if(file_exists(Dir)){try{File f = new File(Dir);InputStreamReader read = new InputStreamReader (new FileInputStream(f),code);BufferedReader reader=new BufferedReader(read); String line; StringBuffer content=new StringBuffer((int)f.length()); while ((line = reader.readLine()) != null) { //System.out.println(line);content.append(line).append("\n");//content.append(line);} read.close();reader.close();if(content.length()>0){ content.deleteCharAt(content.length()-1); }str = content.toString();}catch(Exception e){ }} return str ;} public static String getMYXML(String XMLINE,String KEY,String M){String t = "";String e1 = "<" +KEY+">";String e2 = "</"+KEY+">";if(M.length()==2 && M.equals("[]")){e1 = "[" + KEY + "]";e2 = "[/" + KEY + "]";}if(M.length()==2 && M.equals("<>")){e1 = "<" + KEY + ">";e2 = "</" + KEY + ">";}if(M.length()==2 && M.equals("{}")){e1 = "{" + KEY + "}";e2 = "{/" + KEY + "}";}try{if(XMLINE.length()>5 && KEY.length()>0 && XMLINE.length()> (e1.length() + e2.length())){String e = "";if(XMLINE.indexOf(e1)>=0){e = XMLINE.substring( XMLINE.indexOf(e1) + e1.length() , XMLINE.length());if(e.indexOf(e2)>=0){e = e.substring(0 ,e.indexOf(e2));t = e ; }} }}catch(Exception e) { }return t;} public String dealNull(String str) {String returnstr = null;if(str == null){returnstr = "";}else{returnstr = str;}return returnstr;}public String replace(String str,String substr,String restr){String[] tmp = split(str,substr);String returnstr = null;if(tmp.length!=0) {returnstr = tmp[0];for(int i = 0 ; i < tmp.length - 1 ; i++)returnstr =dealNull(returnstr) + restr +tmp[i+1];} return dealNull(returnstr);}//TODO split();//TODO 自定义split(),可劈任意字符。public String[] split(String source,String div){int arynum = 0,intIdx=0,intIdex=0,div_length = div.length(); if(source.compareTo("")!=0){ if(source.indexOf(div)!=-1){ intIdx = source.indexOf(div); for(int intCount =1 ; ; intCount++){ if(source.indexOf(div,intIdx+div_length)!=-1){intIdx= source.indexOf(div,intIdx+div_length);arynum = intCount;}else {arynum+=2;break;}} }else arynum =1;}else arynum = 0;intIdx=0;intIdex=0;String[] returnStr = new String[arynum];if(source.compareTo("")!=0){if(source.indexOf(div)!=-1){intIdx = source.indexOf(div);returnStr[0]= source.substring(0,intIdx);for(int intCount =1 ; ; intCount++){if(source.indexOf(div,intIdx+div_length)!=-1){intIdex=source.indexOf(div,intIdx+div_length);returnStr[intCount] = source.substring(intIdx+div_length,intIdex);intIdx = source.indexOf(div,intIdx+div_length);}else {returnStr[intCount] = source.substring(intIdx+div_length,source.length());break;}}}else {returnStr[0] = source.substring(0,source.length());return returnStr;}}else {return returnStr;} return returnStr; }}
bat运行脚本:
set CLASSPATH=./lib/*;%CLASSPATH%java mycmf.down.index _oa_mycmf_cfg.txtPAUSE
某个URL备份源配置文件:
OA服务器代码备份
<tempdir>G:/temp/mycmf</tempdir>
<bakurl>http://10.10.34.211:8886/mycmf3/baklist/1.jsp</bakurl>
<locfod>G:/backup_test2/mycmf</locfod>
<partsep>11</partsep> 11为最快
<partsize>1024</partsize> 不能小于1024脚本和java执行文件存放路径:

需要下载那些URL,增加配置文件和运行脚本即可。
