使用Java定时爬取CSDN博客并自动邮件推送
目录
技术栈
核心代码实现
1. 邮件发送任务类
2. CSDN博客爬取任务
3. 主程序入口
4.运行效果
关键技术点
1. Jsoup网页解析
2. QQ邮箱SMTP配置
3. 多线程邮件发送
4. 定时任务调度
本篇博客将介绍如何使用Java实现一个自动化系统,定时爬取CSDN博客内容,并通过QQ邮箱自动发送到指定邮箱。这个系统结合了多线程、定时任务、网络爬虫和邮件发送等技术。
技术栈
-
Jsoup: 用于网页内容爬取和解析
-
Hutool: 提供邮件发送工具类
-
Java多线程: 实现异步邮件发送
-
Java定时任务: 实现定时爬取功能
-
QQ邮箱SMTP服务: 用于发送邮件
核心代码实现
1. 邮件发送任务类
package com.javamail1;import java.security.GeneralSecurityException;import com.sun.mail.util.MailSSLSocketFactory;import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.extra.mail.MailAccount;
import cn.hutool.extra.mail.MailUtil;public class MailRunnable implements Runnable {String title;String link;String content;String date;String num;public MailRunnable(String title, String link, String content, String date, String num) {this.title = title;this.link = link;this.content = content;this.date = date;this.num = num;}@Overridepublic void run() {// TODO Auto-generated method stubSystem.out.println(Thread.currentThread().getName() + "准备发送邮件....");try {// 构建一个邮件的MailAccount对象, 构建邮件账户配置MailAccount mailAccount = new MailAccount();mailAccount.setSslEnable(true);MailSSLSocketFactory mss = new MailSSLSocketFactory("TLSv1.2");// 信任所有的hostmss.setTrustAllHosts(true);mailAccount.setCustomProperty("mail.smtp.ssl.socketFactory", mss);// 设置QQ邮箱SMTP配置mailAccount.setHost("smtp.qq.com");mailAccount.setPort(465);mailAccount.setAuth(true);mailAccount.setFrom("qq邮箱");mailAccount.setUser("发送者的qq号码");mailAccount.setPass(" "); // 临时授权密码// 发送带附件的HTML邮件MailUtil.send(mailAccount, CollUtil.newArrayList("接收者的qq号码"), "CSDN一叶的博客系列", "<h2>博客信息推送</h2>" +"<p><strong>标题:</strong> " + this.title + "</p>" +"<p><strong>链接:</strong> <a href='" + this.link + "'>" + this.link + "</a></p>" +"<p><strong>内容摘要:</strong> " + this.content + "</p>" +"<p><strong>发布时间:</strong> " + this.date + "</p>" +"<p><strong>阅读量:</strong> " + this.num + "</p>", true, FileUtil.file("d:/c1.jpg"));System.out.println("发送邮件成功....");} catch (GeneralSecurityException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}
2. CSDN博客爬取任务
package com.javamail1;import java.util.TimerTask;import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;public class CsdnTask extends TimerTask {static int i = 0;static Elements elements;static {try {// TODO Auto-generated method stubString url = "https://blog.csdn.net/jdsjlzx/article/list/1";// 1.第一步要求判断能不能去爬取这个链接Connection conn = Jsoup.connect(url);System.out.println(conn);// 2.获取这个文档对象Document doc = conn.get();// System.out.println(doc);// 3.获取整合文档doc, 用CSS选择器获取博客列表elements = doc.select(".article-list .article-item-box");} catch (Exception e) {e.printStackTrace();}}@Overridepublic void run() {try {// 获取单条博客信息Element e = elements.get(i);String title = e.select("a").text();String link = e.select("a").attr("href");String content = e.select(".content").text();String date = e.select(".date").text();String num = e.select(".read-num").first().text();System.out.println("一条博客的信息为:"+title+","+link+","+content+","+date+","+num);// 启动线程发送邮件MailRunnable m = new MailRunnable(title,link,content,date,num);new Thread(m).start();i++;}catch (Exception e) {}}}
3. 主程序入口
package com.javamail1;import java.util.Random;
import java.util.Timer;public class Test {public static void main(String[] args) {// 创建定时器,随机间隔执行爬取任务Timer t = new Timer();t.schedule(new CsdnTask(), 1 * 1000, new Random().nextInt(80000));}}
其中用到的Maven依赖有:
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.38</version></dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.18.1</version>
</dependency><dependency>
<groupId>com.sun.mail</groupId>
<artifactId>javax.mail</artifactId>
<version>1.6.2</version></dependency>
4.运行效果
关键技术点
1. Jsoup网页解析
// 使用CSS选择器精准定位元素
Elements elements = doc.select(".article-list .article-item-box");
String title = e.select("a").text();
String link = e.select("a").attr("href");
2. QQ邮箱SMTP配置
// SSL安全连接配置
mailAccount.setSslEnable(true);
MailSSLSocketFactory mss = new MailSSLSocketFactory("TLSv1.2");
mss.setTrustAllHosts(true);// 邮箱服务器配置
mailAccount.setHost("smtp.qq.com");
mailAccount.setPort(465);
mailAccount.setAuth(true);
3. 多线程邮件发送
// 使用Runnable接口实现多线程
public class MailRunnable implements Runnable {@Overridepublic void run() {// 邮件发送逻辑}
}// 启动线程
MailRunnable m = new MailRunnable(title,link,content,date,num);
new Thread(m).start();
4. 定时任务调度
// 使用Timer实现定时任务
Timer t = new Timer();
t.schedule(new CsdnTask(), 1 * 1000, new Random().nextInt(80000));