垃圾回收算法(GC Algorithm)基石:标记-清除、复制、标记-整理
目录
摘要
第一章:垃圾回收基础概念与算法分类
1.1 GC算法核心概念体系
第二章:标记-清除算法(Mark-Sweep)深度解析
2.1 算法流程与实现细节
2.2 算法特性分析与优化策略
第三章:复制算法(Copying)原理与实践
3.1 算法核心流程与实现
3.2 算法特性与适用场景
第四章:标记-整理算法(Mark-Compact)综合解决方案
4.1 算法流程与内存整理策略
4.2 算法比较与综合评估
总结与展望
核心要点总结
未来发展趋势
参考链接
摘要
垃圾回收(Garbage Collection)是现代编程语言内存管理的核心技术。本文深入解析三大基础GC算法——标记-清除、复制、标记-整理的实现原理、性能特性和适用场景,通过算法流程图、复杂度分析和实战案例,揭示不同算法在吞吐量、暂停时间、内存效率等方面的权衡取舍,为理解现代垃圾收集器奠定坚实基础。
第一章:垃圾回收基础概念与算法分类
1.1 GC算法核心概念体系

垃圾回收核心术语解析:
/*** GC算法基础概念详解*/
public class GCBasicConcepts {// 1. 可达性分析(Reachability Analysis)public class ReachabilityAnalyzer {// GC Roots集合:作为可达性分析的起点private final Set<Object> gcRoots = new HashSet<>();public void registerGCRoot(Object root) {gcRoots.add(root);}public boolean isReachable(Object obj) {// 从GC Roots开始遍历引用链return findPathToRoot(obj) != null;}private List<Object> findPathToRoot(Object obj) {// 使用深度优先搜索或广度优先搜索// 查找从对象到GC Roots的路径return dfsSearch(obj, new HashSet<>());}}// 2. 垃圾识别算法对比public class GarbageIdentification {// 方法1:引用计数(Reference Counting)public class ReferenceCounting {private int count = 0;public void addReference() {count++;}public void removeReference() {count--;if (count == 0) {cleanup(); // 引用为0时立即回收}}// 问题:循环引用无法回收public void circularReferenceProblem() {Object a = new Object();Object b = new Object();a.reference = b; // a引用bb.reference = a; // b引用a// 即使a和b都不可达,引用计数也不为0}}// 方法2:跟踪回收(Tracing GC)public class TracingGC {// 通过GC Roots遍历所有可达对象// 不可达对象即为垃圾public Set<Object> findGarbage(Set<Object> allObjects) {Set<Object> reachable = findReachableObjects();Set<Object> garbage = new HashSet<>(allObjects);garbage.removeAll(reachable);return garbage;}}}// 3. GC性能评估指标public class GCPerformanceMetrics {// 吞吐量(Throughput)public class ThroughputMetric {private long totalRuntime = 0;private long totalGCTime = 0;public double getThroughput() {return (double) (totalRuntime - totalGCTime) / totalRuntime;}// 目标:GC时间占比尽可能小(通常>95%)}// 暂停时间(Pause Time)public class PauseTimeMetric {private List<Long> pauseTimes = new ArrayList<>();public long getMaxPauseTime() {return pauseTimes.stream().max(Long::compare).orElse(0L);}public double getAveragePauseTime() {return pauseTimes.stream().mapToLong(Long::longValue).average().orElse(0.0);}// 目标:暂停时间尽可能短且稳定}// 内存效率(Footprint)public class MemoryEfficiency {private long heapSize;private long usedMemory;public double getMemoryUtilization() {return (double) usedMemory / heapSize;}// 目标:在有限内存内最大化利用效率}}
}
第二章:标记-清除算法(Mark-Sweep)深度解析
2.1 算法流程与实现细节

标记-清除算法完整实现:
/*** 标记-清除算法详细实现*/
public class MarkSweepGC {// 堆内存管理public class HeapMemory {private final byte[] memory; // 连续内存块private final int objectHeaderSize = 8; // 对象头大小private final FreeList freeList; // 空闲内存链表// 对象头结构:标记位 + 大小信息private class ObjectHeader {boolean marked; // 标记位(存活/垃圾)int size; // 对象大小int type; // 对象类型}public HeapMemory(int size) {this.memory = new byte[size];this.freeList = new FreeList(0, size); // 初始整个堆为空闲}// 对象分配public Integer allocate(int size) {// 在空闲链表中寻找合适的内存块FreeBlock block = freeList.findFit(size + objectHeaderSize);if (block == null) {return null; // 分配失败}// 设置对象头setObjectHeader(block.address, size, false);// 分割剩余空间(如有)freeList.splitBlock(block, size + objectHeaderSize);return block.address + objectHeaderSize; // 返回对象数据区地址}}// 标记阶段实现public class MarkPhase {private final Set<Object> gcRoots;private final HeapMemory heap;public void mark() {// 从每个GC Root开始深度优先遍历for (Object root : gcRoots) {markRecursive(root);}}private void markRecursive(Object obj) {if (obj == null || isMarked(obj)) {return; // 已标记或空对象}// 标记当前对象setMarked(obj, true);// 递归标记引用对象for (Object reference : getReferences(obj)) {markRecursive(reference);}}// 标记栈实现(避免递归深度过大)public void markWithStack() {Stack<Object> stack = new Stack<>();// 初始将GC Roots入栈for (Object root : gcRoots) {if (!isMarked(root)) {setMarked(root, true);stack.push(root);}}// 栈式遍历while (!stack.isEmpty()) {Object current = stack.pop();for (Object ref : getReferences(current)) {if (ref != null && !isMarked(ref)) {setMarked(ref, true);stack.push(ref);}}}}}// 清除阶段实现public class SweepPhase {private final HeapMemory heap;private final FreeList freeList;public void sweep() {int address = 0;// 线性扫描整个堆内存while (address < heap.size()) {ObjectHeader header = heap.getHeader(address);if (header.marked) {// 存活对象:清除标记位(为下次GC准备)header.marked = false;address += header.size + heap.objectHeaderSize;} else {// 垃圾对象:合并连续空闲块int garbageSize = header.size + heap.objectHeaderSize;freeList.addFreeBlock(address, garbageSize);address += garbageSize;}}// 合并相邻空闲块(减少碎片)freeList.coalesce();}}// 空闲链表管理public class FreeList {private FreeBlock head;class FreeBlock {int address;int size;FreeBlock next;FreeBlock(int address, int size) {this.address = address;this.size = size;}}// 首次适应算法(First Fit)public FreeBlock findFit(int requiredSize) {FreeBlock current = head;FreeBlock prev = null;while (current != null) {if (current.size >= requiredSize) {// 找到合适块if (prev != null) {prev.next = current.next;} else {head = current.next;}return current;}prev = current;current = current.next;}return null; // 没有合适块}// 分割内存块public void splitBlock(FreeBlock block, int usedSize) {if (block.size > usedSize) {// 剩余空间足够,分割出新空闲块FreeBlock newBlock = new FreeBlock(block.address + usedSize, block.size - usedSize);addFreeBlock(newBlock);}}// 合并相邻空闲块public void coalesce() {FreeBlock current = head;while (current != null && current.next != null) {if (current.address + current.size == current.next.address) {// 合并相邻块current.size += current.next.size;current.next = current.next.next;} else {current = current.next;}}}}
}
2.2 算法特性分析与优化策略
/*** 标记-清除算法性能分析与优化*/
public class MarkSweepAnalysis {// 算法复杂度分析public class ComplexityAnalysis {// 时间复杂度public void timeComplexity() {// 标记阶段:O(L) - L为存活对象引用链长度// 清除阶段:O(H) - H为堆内存大小// 总复杂度:O(L + H)// 实际性能:与存活对象数量成正比,与堆大小成正比}// 空间复杂度public void spaceComplexity() {// 额外空间需求:// - 标记位:每个对象1bit(在对象头中)// - 递归栈/标记栈:最坏情况O(L)// - 空闲链表:O(F) - F为空闲块数量}}// 优势与劣势分析public class ProsAndCons {public void advantages() {// 1. 实现相对简单// 2. 不需要移动对象(适合大对象)// 3. 空间开销小(只需标记位)// 4. 适用于非连续内存管理}public void disadvantages() {// 1. 内存碎片问题严重// 2. 分配效率低(需要遍历空闲链表)// 3. 清除阶段需要扫描整个堆// 4. 不适用于实时系统(暂停时间不稳定)}}// 优化策略public class OptimizationStrategies {// 优化1:延迟清除(Lazy Sweep)public class LazySweeping {private final Set<Integer> garbageBlocks = new HashSet<>();public void delayedSweep() {// 只在分配失败时进行部分清除// 减少单次GC暂停时间}}// 优化2:多空闲链表(Segregated Free Lists)public class SegregatedFreeList {private final Map<Integer, FreeList> sizeClasses = new HashMap<>();public void initSizeClasses() {// 按大小分类管理空闲块sizeClasses.put(16, new FreeList()); // 16字节块sizeClasses.put(32, new FreeList()); // 32字节块sizeClasses.put(64, new FreeList()); // 64字节块// ... 更多大小分类}public Integer allocate(int size) {// 找到合适的大小分类int sizeClass = roundUpToNearestPowerOfTwo(size);FreeList list = sizeClasses.get(sizeClass);return list.allocate(sizeClass);}}// 优化3:标记位图(Mark Bitmap)public class MarkBitmap {private final BitSet markBits;private final int heapSize;private final int objectAlignment;public MarkBitmap(int heapSize, int alignment) {this.heapSize = heapSize;this.objectAlignment = alignment;this.markBits = new BitSet(heapSize / alignment);}public void setMarked(int address) {int bitIndex = address / objectAlignment;markBits.set(bitIndex);}public boolean isMarked(int address) {int bitIndex = address / objectAlignment;return markBits.get(bitIndex);}// 优点:减少对象头开销,支持并行标记}}// 实际应用案例public class RealWorldUsage {// 案例1:保守式GC(Conservative GC)public class ConservativeGC {// 用于无法准确识别指针的场景(如C/C++绑定)// 将可能为指针的值都视为GC Roots}// 案例2:增量标记-清除public class IncrementalMarkSweep {// 将GC工作分成小步骤执行// 减少单次暂停时间,适合交互式应用}}
}
第三章:复制算法(Copying)原理与实践
3.1 算法核心流程与实现

复制算法详细实现:
/*** 复制算法完整实现*/
public class CopyingGC {// 堆内存分区管理public class CopyingHeap {private final byte[] fromSpace;private final byte[] toSpace;private boolean usingFromSpace = true;private int fromAllocPtr = 0;private int toAllocPtr = 0;private final int spaceSize;public CopyingHeap(int totalSize) {this.spaceSize = totalSize / 2;this.fromSpace = new byte[spaceSize];this.toSpace = new byte[spaceSize];}// 获取当前使用的空间public byte[] getCurrentSpace() {return usingFromSpace ? fromSpace : toSpace;}public int getAllocPointer() {return usingFromSpace ? fromAllocPtr : toAllocPtr;}// 对象分配(指针碰撞)public Integer allocate(int size) {byte[] currentSpace = getCurrentSpace();int allocPtr = getAllocPointer();if (allocPtr + size > spaceSize) {return null; // 空间不足,触发GC}int address = allocPtr;setAllocPointer(allocPtr + size);return address;}// 执行垃圾回收public void garbageCollect() {if (usingFromSpace) {copyFromTo(fromSpace, toSpace);usingFromSpace = false;toAllocPtr = 0; // 重置分配指针} else {copyFromTo(toSpace, fromSpace);usingFromSpace = true;fromAllocPtr = 0;}}}// 对象复制与引用更新public class ObjectCopier {private final Map<Integer, Integer> forwardMap = new HashMap<>();public void copyObjects(byte[] fromSpace, byte[] toSpace) {// 第一阶段:复制所有存活对象for (Object root : gcRoots) {copyObjectRecursive(root, fromSpace, toSpace);}// 第二阶段:更新所有引用updateReferences(toSpace);}private void copyObjectRecursive(Object obj, byte[] from, byte[] to) {if (obj == null || isAlreadyCopied(obj)) {return;}// 复制对象数据int fromAddress = getAddress(obj);int size = getObjectSize(obj);int toAddress = allocateInToSpace(size);// 复制内存内容System.arraycopy(from, fromAddress, to, toAddress, size);// 记录转发地址forwardMap.put(fromAddress, toAddress);// 递归复制引用对象for (Object ref : getReferences(obj)) {copyObjectRecursive(ref, from, to);}}// 更新引用地址private void updateReferences(byte[] toSpace) {for (int i = 0; i < toAllocPtr; i += getObjectSizeAt(i)) {updateObjectReferences(toSpace, i);}}private void updateObjectReferences(byte[] space, int address) {for (Reference ref : getReferencesAt(space, address)) {int oldAddress = ref.getTargetAddress();if (forwardMap.containsKey(oldAddress)) {ref.setTargetAddress(forwardMap.get(oldAddress));}}}}// 分配策略优化public class AllocationOptimization {// 指针碰撞(Bump Pointer)分配public class BumpPointerAllocator {private int pointer = 0;private final int limit;public Integer allocate(int size) {if (pointer + size > limit) {return null;}int addr = pointer;pointer += size;return addr;}// 优点:分配速度极快(O(1))// 缺点:需要紧凑的内存布局}// 半区大小与存活率关系public void spaceSizingAnalysis() {// 关键公式:存活对象大小 ≤ 半区大小// 如果存活率 > 50%,复制算法效率下降double survivalRate = calculateSurvivalRate();if (survivalRate > 0.5) {System.out.println("警告:存活率过高,复制算法效率低");}}}
}
3.2 算法特性与适用场景
/*** 复制算法深度分析*/
public class CopyingAlgorithmAnalysis {// 性能特征分析public class PerformanceCharacteristics {// 时间复杂度分析public void timeComplexity() {// 复制阶段:O(L) - L为存活对象数量// 总工作量与存活对象成正比,与堆大小无关// 优点:垃圾对象不参与工作,适合高垃圾产生率场景}// 空间效率分析public void spaceEfficiency() {// 内存利用率:最多50%(半区闲置)// 但无碎片问题,实际可用空间连续// 适合场景:对象生命周期短,垃圾产生快}}// 优势与局限性public class StrengthsAndWeaknesses {public void strengths() {// 1. 无内存碎片问题// 2. 分配效率极高(指针碰撞)// 3. 吞吐量高(只处理存活对象)// 4. 实现相对简单}public void weaknesses() {// 1. 内存利用率低(50%闲置)// 2. 存活率高时效率下降// 3. 需要对象移动(不适合大对象)// 4. 需要停止所有用户线程}}// 实际应用与变种public class PracticalApplications {// 应用1:年轻代垃圾回收(Young GC)public class YoungGenerationGC {// 大多数JVM年轻代使用复制算法// 假设:年轻代对象死亡率高(通常98%以上)public void minorGC() {// Eden + From Survivor → To Survivor// 存活对象年龄+1,年龄足够时晋升老年代}}// 应用2:多空间复制算法public class MultiSpaceCopying {private final int numSpaces = 3; // 3个空间轮流使用private int currentSpace = 0;public void multiSpaceGC() {// 从多个来源空间复制到一个目标空间// 减少空间浪费,提高内存利用率}}// 应用3:增量复制算法public class IncrementalCopying {// 将复制过程分成多个小步骤// 减少单次暂停时间,适合实时系统}}// 性能优化策略public class OptimizationTechniques {// 优化1:年龄分级与晋升策略public class AgeBasedPromotion {private final int maxTenuringThreshold = 15;private final Map<Object, Integer> objectAges = new HashMap<>();public boolean shouldPromote(Object obj) {int age = objectAges.getOrDefault(obj, 0);return age >= maxTenuringThreshold;}public void handlePromotion(Object obj) {if (shouldPromote(obj)) {promoteToOldGeneration(obj);} else {copyToSurvivorSpace(obj);objectAges.put(obj, objectAges.getOrDefault(obj, 0) + 1);}}}// 优化2:大对象直接分配public class LargeObjectHandling {private final int largeObjectThreshold = 1024 * 1024; // 1MBpublic Integer allocateObject(int size) {if (size > largeObjectThreshold) {// 大对象直接分配到老年代return allocateInOldGeneration(size);} else {// 普通对象使用复制算法return allocateInYoungGeneration(size);}}}}
}
第四章:标记-整理算法(Mark-Compact)综合解决方案
4.1 算法流程与内存整理策略

标记-整理算法核心实现:
/*** 标记-整理算法详细实现*/
public class MarkCompactGC {// 三色标记法实现public class TricolorMarking {private static final int WHITE = 0; // 未访问(垃圾候选)private static final int GRAY = 1; // 已访问但引用未处理private static final int BLACK = 2; // 完全处理完成public void markWithTricolor() {// 初始所有对象为白色initializeAllWhite();// GC Roots设为灰色for (Object root : gcRoots) {setColor(root, GRAY);}// 处理灰色对象直到队列为空while (!graySet.isEmpty()) {Object current = graySet.remove();// 处理当前对象的引用for (Object ref : getReferences(current)) {if (getColor(ref) == WHITE) {setColor(ref, GRAY);graySet.add(ref);}}setColor(current, BLACK);}}}// 整理阶段核心算法public class CompactionAlgorithm {private final Map<Integer, Integer> forwardMap = new HashMap<>();// 方法1:双指针整理(Two-Finger Compaction)public void twoFingerCompaction() {int free = 0; // 空闲指针(从低地址开始)int scan = heapSize - 1; // 扫描指针(从高地址开始)// 第一阶段:计算对象新位置while (free < scan) {if (isObjectAt(free) && isMarked(free)) {// 存活对象,计算新位置int newAddr = calculateNewAddress(free);forwardMap.put(free, newAddr);free += getObjectSize(free);} else {// 找高地址的存活对象来填充while (scan > free && (!isObjectAt(scan) || !isMarked(scan))) {scan--;}if (scan > free) {// 移动高地址对象到低地址moveObject(scan, free);forwardMap.put(scan, free);free += getObjectSize(scan);scan--;}}}}// 方法2:滑动整理(Sliding Compaction)public void slidingCompaction() {int compactPointer = 0;// 计算每个存活对象的新地址for (int addr = 0; addr < heapSize; addr += getObjectSize(addr)) {if (isMarked(addr)) {forwardMap.put(addr, compactPointer);compactPointer += getObjectSize(addr);}}// 移动对象到新位置for (int addr = 0; addr < heapSize; addr += getObjectSize(addr)) {if (isMarked(addr)) {int newAddr = forwardMap.get(addr);if (newAddr != addr) {moveObject(addr, newAddr);}}}}// 方法3:线性整理(LISP2算法)public void lisp2Compaction() {// 三次遍历:计算新位置、更新引用、移动对象firstPassComputeAddresses();secondPassUpdateReferences();thirdPassMoveObjects();}}// 引用更新策略public class ReferenceUpdater {// 基于转发地址表的引用更新public void updateReferencesWithForwardMap() {for (int addr = 0; addr < heapSize; addr += getObjectSize(addr)) {if (isMarked(addr)) {updateObjectReferences(addr);}}}private void updateObjectReferences(int objectAddr) {for (Reference ref : getReferencesAt(objectAddr)) {int targetAddr = ref.getTargetAddress();if (forwardMap.containsKey(targetAddr)) {ref.setTargetAddress(forwardMap.get(targetAddr));}}}// 基于句柄的引用更新(避免大量指针更新)public class HandleBasedSystem {private final Map<Integer, Integer> handleTable = new HashMap<>();public void updateThroughHandles() {// 对象通过句柄间接引用// 移动对象时只需更新句柄表,引用保持不变for (Handle handle : getAllHandles()) {if (forwardMap.containsKey(handle.getObjectAddress())) {handle.setAddress(forwardMap.get(handle.getObjectAddress()));}}}}}
}
4.2 算法比较与综合评估
/*** 三大基础GC算法综合对比*/
public class GCAlgorithmComparison {// 算法特性对比矩阵public class AlgorithmMatrix {public void compareAlgorithms() {// 时间复杂度对比System.out.println("时间复杂度:");System.out.println("标记-清除: O(L + H) - L=存活对象, H=堆大小");System.out.println("复制算法: O(L) - 只与存活对象相关");System.out.println("标记-整理: O(L + H) - 类似标记-清除但需移动");// 空间效率对比System.out.println("\n空间效率:");System.out.println("标记-清除: 高(无闲置空间,但有碎片)");System.out.println("复制算法: 低(50%空间闲置,无碎片)");System.out.println("标记-整理: 高(无闲置空间,无碎片)");// 分配效率对比System.out.println("\n分配效率:");System.out.println("标记-清除: 低(需搜索空闲链表)");System.out.println("复制算法: 高(指针碰撞分配)");System.out.println("标记-整理: 高(指针碰撞分配)");}}// 适用场景分析public class ScenarioAnalysis {// 场景1:实时系统(低延迟要求)public void realTimeSystem() {// 优先考虑:增量式标记-清除// 避免:标准标记-整理(暂停时间长)}// 场景2:大数据处理(高吞吐量)public void bigDataProcessing() {// 优先考虑:复制算法(年轻代)// 配合:标记-整理(老年代)}// 场景3:内存受限环境public void memoryConstrained() {// 优先考虑:标记-清除(空间效率高)// 避免:复制算法(50%空间浪费)}// 场景4:大对象密集型public void largeObjectIntensive() {// 优先考虑:标记-清除(避免对象移动开销)// 避免:复制算法和标记-整理(移动成本高)}}// 现代GC器的算法组合public class ModernGCCombinations {// 分代收集策略public class GenerationalCollection {// 年轻代:复制算法(高死亡率假设)public void youngGeneration() {// 使用复制算法,快速回收短期对象}// 老年代:标记-清除或标记-整理public void oldGeneration() {// 标记-清除:吞吐量优先,容忍碎片// 标记-整理:避免碎片,但暂停时间长}}// 混合式算法public class HybridAlgorithms {// 增量标记-整理public class IncrementalMarkCompact {// 将整理过程分成多个增量步骤// 平衡吞吐量和暂停时间}// 并发标记-清除public class ConcurrentMarkSweep {// 标记阶段与用户线程并发执行// 减少暂停时间,但可能产生浮动垃圾}}}// 性能调优指南public class PerformanceTuningGuide {// 根据应用特征选择算法public void selectAlgorithmBasedOnApp() {ApplicationProfile profile = analyzeApplication();if (profile.isLowLatencyRequired()) {// 交互式应用:增量式GCconfigureIncrementalGC();} else if (profile.hasHighThroughputRequirement()) {// 批处理应用:吞吐量优先GCconfigureThroughputGC();} else if (profile.isMemoryConstrained()) {// 内存敏感应用:空间效率优先configureSpaceEfficientGC();}}// 关键参数调优public void keyParameterTuning() {// 堆大小设置setHeapSizeBasedOnWorkingSet();// 分代比例调整adjustYoungOldRatio();// GC触发阈值configureGCTriggers();}}
}
总结与展望
核心要点总结
通过本文的深入分析,我们可以得出以下关键结论:
-
算法选择是权衡的艺术:
-
标记-清除:空间效率高,但碎片问题严重
-
复制算法:吞吐量高,但内存利用率低
-
标记-整理:综合性能好,但实现复杂度高
-
-
实际应用中的组合策略:
-
现代JVM采用分代收集,不同代使用不同算法
-
年轻代适合复制算法(对象死亡率高)
-
老年代适合标记-清除或标记-整理
-
-
性能优化方向:
-
增量式GC减少暂停时间
-
并发GC提高吞吐量
-
自适应GC根据应用特征动态调整
-
未来发展趋势
-
Region-Based GC:如G1、ZGC将堆划分为多个Region,更精细的内存管理
-
并发与并行:最大限度减少STW时间,提高系统响应性
-
AI驱动的GC:机器学习优化GC参数和策略选择
-
异构内存管理:针对NVMe、PMem等新型存储的GC优化
理解这三种基础算法是掌握现代垃圾回收技术的关键基石,它们的思想和优化策略在各种高级GC器中都有体现和应用。
参考链接
- Oracle GC调优官方指南
- GC算法可视化工具
