再来,一次内存溢出
问题描述
事情总是很突然,线上卡死,很慢,白屏。
其他两台服务器,内存正常,CPU也是飙升。
解决步骤
工具分析(老套路)
嗨,又不是没遇到过,轻车熟路。先找hprof文件,用工具分析一把。
都定位到了4个项目中的代码。
就这,也太easy了吧。
分析代码
异常代码如下
java stack of pool-72-thread-6at java.util.concurrent.Executors$RunnableAdapter.call() (line: 511)at java.util.concurrent.FutureTask.run() (line: 266)at java.util.concurrent.ThreadPoolExecutor.runWorker(java.util.concurrent.ThreadPoolExecutor$Worker) (line: 1142)at java.util.concurrent.ThreadPoolExecutor$Worker.run() (line: 617)at java.lang.Thread.run() (line: 745)java stack of pool-72-thread-3at java.util.concurrent.FutureTask.run() (line: 266)at java.util.concurrent.ThreadPoolExecutor.runWorker(java.util.concurrent.ThreadPoolExecutor$Worker) (line: 1142)at java.util.concurrent.ThreadPoolExecutor$Worker.run() (line: 617)at java.lang.Thread.run() (line: 745)java stack of catalina-exec-8at com.chaboshi.tools.backend.util.RedisClient.get(java.lang.String, com.chaboshi.tools.backend.util.CacheCallback, int) (line: 78)at com.chaboshi.web.qa.service.auditv2.InsuranceCityAuditV2ServiceImpl.getDetectionHistoryCompareData(java.lang.String) (line: 447)at com.chaboshi.web.qa.service.auditv2.InsuranceCityAuditV2ServiceImpl.historyDifferentList(java.lang.String) (line: 428)at com.chaboshi.web.qa.controllers.auditv2.InsuranceCityAuditV2Controller.historyDifferentList(java.lang.String) (line: 697)at sun.reflect.GeneratedMethodAccessor2466.invoke(java.lang.Object, java.lang.Object[ ])at sun.reflect.DelegatingMethodAccessorImpl.invoke(java.lang.Object, java.lang.Object[ ]) (line: 43)at java.lang.reflect.Method.invoke(java.lang.Object, java.lang.Object[ ]) (line: 498)at com.chaboshi.wf.mvc.action.MethodAction.invoke() (line: 196)at com.chaboshi.wf.mvc.Dispatcher.service(com.chaboshi.wf.mvc.BeatContext) (line: 34)at com.chaboshi.wf.mvc.WFBootstrap.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse, javax.servlet.FilterChain) (line: 64)at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 181)at org.apache.catalina.core.ApplicationFilterChain.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 156)at com.chaboshi.web.qa.filter.DMaskingFilter.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse, javax.servlet.FilterChain) (line: 93)at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 181)at org.apache.catalina.core.ApplicationFilterChain.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 156)at org.apache.catalina.core.StandardWrapperValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 168)at org.apache.catalina.core.StandardContextValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 90)at org.apache.catalina.authenticator.AuthenticatorBase.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 483)at org.apache.catalina.core.StandardHostValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 130)at org.apache.catalina.valves.ErrorReportValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 93)at org.apache.catalina.valves.AbstractAccessLogValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 679)at org.apache.catalina.core.StandardEngineValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 74)at org.apache.catalina.connector.CoyoteAdapter.service(org.apache.coyote.Request, org.apache.coyote.Response) (line: 346)at org.apache.coyote.http11.Http11Processor.service(org.apache.tomcat.util.net.SocketWrapperBase) (line: 617)at org.apache.coyote.AbstractProcessorLight.process(org.apache.tomcat.util.net.SocketWrapperBase, org.apache.tomcat.util.net.SocketEvent) (line: 63)at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(org.apache.tomcat.util.net.SocketWrapperBase, org.apache.tomcat.util.net.SocketEvent) (line: 934)at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun() (line: 1698)at org.apache.tomcat.util.net.SocketProcessorBase.run() (line: 52)at org.apache.tomcat.util.threads.ThreadPoolExecutor.runWorker(org.apache.tomcat.util.threads.ThreadPoolExecutor$Worker) (line: 1191)at org.apache.tomcat.util.threads.ThreadPoolExecutor$Worker.run() (line: 659)at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run() (line: 63)at java.lang.Thread.run() (line: 745)java stack of catalina-exec-7at com.chaboshi.web.qa.service.impl.bi.DetectionAuditStaServiceImpl$$Lambda$863.get()at com.chaboshi.tools.backend.util.RedisClient.get(java.lang.String, com.chaboshi.tools.backend.util.CacheCallback, int) (line: 88)at com.chaboshi.web.qa.service.impl.bi.DetectionAuditStaServiceImpl.timeoutList(com.alibaba.fastjson.JSONObject) (line: 74)at com.chaboshi.web.qa.controllers.bi.DetectionAuditStaController.timeoutListMonth(com.alibaba.fastjson.JSONObject) (line: 61)at sun.reflect.NativeMethodAccessorImpl.invoke0(java.lang.reflect.Method, java.lang.Object, java.lang.Object[ ])at sun.reflect.NativeMethodAccessorImpl.invoke(java.lang.Object, java.lang.Object[ ]) (line: 62)at sun.reflect.DelegatingMethodAccessorImpl.invoke(java.lang.Object, java.lang.Object[ ]) (line: 43)at java.lang.reflect.Method.invoke(java.lang.Object, java.lang.Object[ ]) (line: 498)at com.chaboshi.wf.mvc.action.MethodAction.invoke() (line: 196)at com.chaboshi.wf.mvc.Dispatcher.service(com.chaboshi.wf.mvc.BeatContext) (line: 34)at com.chaboshi.wf.mvc.WFBootstrap.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse, javax.servlet.FilterChain) (line: 64)at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 181)at org.apache.catalina.core.ApplicationFilterChain.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 156)at com.chaboshi.web.qa.filter.DMaskingFilter.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse, javax.servlet.FilterChain) (line: 93)at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 181)at org.apache.catalina.core.ApplicationFilterChain.doFilter(javax.servlet.ServletRequest, javax.servlet.ServletResponse) (line: 156)at org.apache.catalina.core.StandardWrapperValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 168)at org.apache.catalina.core.StandardContextValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 90)at org.apache.catalina.authenticator.AuthenticatorBase.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 483)at org.apache.catalina.core.StandardHostValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 130)at org.apache.catalina.valves.ErrorReportValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 93)at org.apache.catalina.valves.AbstractAccessLogValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 679)at org.apache.catalina.core.StandardEngineValve.invoke(org.apache.catalina.connector.Request, org.apache.catalina.connector.Response) (line: 74)at org.apache.catalina.connector.CoyoteAdapter.service(org.apache.coyote.Request, org.apache.coyote.Response) (line: 346)at org.apache.coyote.http11.Http11Processor.service(org.apache.tomcat.util.net.SocketWrapperBase) (line: 617)at org.apache.coyote.AbstractProcessorLight.process(org.apache.tomcat.util.net.SocketWrapperBase, org.apache.tomcat.util.net.SocketEvent) (line: 63)at org.apache.coyote.AbstractProtocol$ConnectionHandler.process(org.apache.tomcat.util.net.SocketWrapperBase, org.apache.tomcat.util.net.SocketEvent) (line: 934)at org.apache.tomcat.util.net.NioEndpoint$SocketProcessor.doRun() (line: 1698)at org.apache.tomcat.util.net.SocketProcessorBase.run() (line: 52)at org.apache.tomcat.util.threads.ThreadPoolExecutor.runWorker(org.apache.tomcat.util.threads.ThreadPoolExecutor$Worker) (line: 1191)at org.apache.tomcat.util.threads.ThreadPoolExecutor$Worker.run() (line: 659)at org.apache.tomcat.util.threads.TaskThread$WrappingRunnable.run() (line: 63)at java.lang.Thread.run() (line: 745)
问题项 | ||
审核页面-历史检测差异 | 慢方法,业务需求 | 早9到晚5,一直会调用;调用很频繁; |
消息消费1 | 开启线程池,进行消费。慢方法 | 早9到晚5,均匀调用。 |
消息消费2 | 开启线程池,进行消费 | 早9到晚5,均匀调用。 |
检测审核看板 | 慢方法,业务需求 | 当天仅2次调用,时间很接近 |
这个是答案吗,我有所怀疑。
问题是,调用很均匀,为什么是这个时刻,变量到底是什么呢?
我思考很久,没有答案,又是一天过去了。
跟领导和同事交流后,让我试试从网关请求找找答案。好
网关日志
对,我又花半个下午,寻找这个变量。
如上,拉取仅10分钟网关请求,找到大于1s的请求,拍个序看看。
/detectionContract/list | 13.983s | 订单管理-》合同管理 |
/detectionOM/cityDetail | 17.785s、9.151s | 统计-》检测运营看板,城市详情接口 |
接口1:正常,入参有时间范围和具体参数,就是很慢;原因不明
接口2:巨慢的一个方法,观察了一下。正常都是30s+,甚至有50s+的
接近答案,是这是答案吗?
如何处理
2处activemq消息 | 1、与服务反馈,调整为走消息总线(已验证) 2、去掉用线程池的异步执行 | |
历史检测差异 | 看了执行时间,有些也是巨慢,50s+(已跟产品反馈) | |
检测审核看板 | 加下日志耗时,继续观察 | |
运营看板-城市详情 | 方法也有调用,但是单次都是10s+,也有50s的(跟产品反馈) |
事后反思
1、工具很重要,如JProfiler
2、基础设施很重要,如配置溢出的快照、Prometheus的监控、网关恻日志
3、需要花时间
4、找不到没关系,需要和自己和解。问题不一定都有答案,至少目前是
5、记录问题也很重要,也许下一次就会用上,先报个案
想要抓住这个鬼,挺难。就像总有案件无法侦破一样,我突然对此很敬畏。
可能多方面因素导致这个结果,但目前原因不明。