Java滤波去除异常峰值方法(二)
1. 基于滑动窗口的中值绝对偏差(MAD)方法
public static double[] removeContinuousOutliersMAD(double[] data, int windowSize, double threshold) {double[] filtered = Arrays.copyOf(data, data.length);for (int i = 0; i < data.length; i++) {// 计算窗口内的中位数List<Double> window = new ArrayList<>();for (int j = Math.max(0, i - windowSize/2); j <= Math.min(data.length - 1, i + windowSize/2); j++) {window.add(data[j]);}double median = getMedian(window);// 计算MAD (Median Absolute Deviation)List<Double> deviations = new ArrayList<>();for (Double value : window) {deviations.add(Math.abs(value - median));}double mad = getMedian(deviations);// 替换异常值if (mad != 0 && Math.abs(data[i] - median) > threshold * mad) {filtered[i] = median; // 或用邻域值替换}}return filtered;
}private static double getMedian(List<Double> list) {Collections.sort(list);return list.get(list.size() / 2);
}
2、基于连续异常值计数的剔除方法
public static double[] removeContinuousOutliers(double[] data, double threshold, int maxConsecutive) {double[] filtered = Arrays.copyOf(data, data.length);double mean = calculateMean(data);double stdDev = calculateStdDev(data, mean);int consecutiveCount = 0;for (int i = 0; i < data.length; i++) {if (Math.abs(data[i] - mean) > threshold * stdDev) {consecutiveCount++;if (consecutiveCount > maxConsecutive) {// 使用前后非异常值的平均值替换double replacement = findReplacementValue(data, i);filtered[i] = replacement;}} else {consecutiveCount = 0;}}return filtered;
}private static double findReplacementValue(double[] data, int index) {// 向前找第一个非异常值double prev = 0;for (int i = index - 1; i >= 0; i--) {if (Math.abs(data[i] - calculateMean(data)) <= calculateStdDev(data, calculateMean(data))) {prev = data[i];break;}}// 向后找第一个非异常值double next = 0;for (int i = index + 1; i < data.length; i++) {if (Math.abs(data[i] - calculateMean(data)) <= calculateStdDev(data, calculateMean(data))) {next = data[i];break;}}return (prev + next) / 2.0;
}
3. 使用指数加权移动平均(EWMA)检测连续异常
public static double[] detectContinuousAnomaliesEWMA(double[] data, double lambda, double threshold) {double[] filtered = Arrays.copyOf(data, data.length);double ewma = data[0];int anomalyStreak = 0;for (int i = 1; i < data.length; i++) {ewma = lambda * data[i] + (1 - lambda) * ewma;double residual = Math.abs(data[i] - ewma);if (residual > threshold) {anomalyStreak++;if (anomalyStreak >= 3) { // 连续3个点异常// 使用EWMA值替换filtered[i] = ewma;}} else {anomalyStreak = 0;}}return filtered;
}
4. 基于变化率的连续异常检测
public static double[] removeContinuousSpikes(double[] data, double rateThreshold) {double[] filtered = Arrays.copyOf(data, data.length);double[] rates = new double[data.length - 1];// 计算变化率for (int i = 0; i < rates.length; i++) {rates[i] = Math.abs(data[i+1] - data[i]);}// 计算变化率的统计量double rateMean = calculateMean(rates);double rateStd = calculateStdDev(rates, rateMean);// 检测连续异常变化int spikeLength = 0;for (int i = 1; i < data.length - 1; i++) {double prevRate = Math.abs(data[i] - data[i-1]);double nextRate = Math.abs(data[i+1] - data[i]);if ((prevRate > rateMean + rateThreshold * rateStd) && (nextRate > rateMean + rateThreshold * rateStd)) {spikeLength++;if (spikeLength >= 2) { // 连续两个点变化率过大// 使用前后点的平均值替换filtered[i] = (data[i-1] + data[i+1]) / 2.0;}} else {spikeLength = 0;}}return filtered;
}
辅助方法
private static double calculateMean(double[] data) {double sum = 0;for (double d : data) sum += d;return sum / data.length;
}private static double calculateStdDev(double[] data, double mean) {double variance = 0;for (double d : data) variance += Math.pow(d - mean, 2);return Math.sqrt(variance / data.length);
}
测试:
public static void main(String[] args) {double[] data = {10, 10.1, 10.2, 50, 55, 52, 10.3, 10.2, 10.1, 60, 65, 10};// 方法1: 基于MADdouble[] result1 = removeContinuousOutliersMAD(data, 5, 3.0);// 方法2: 基于连续计数double[] result2 = removeContinuousOutliers(data, 2.5, 2);// 方法3: EWMA方法double[] result3 = detectContinuousAnomaliesEWMA(data, 0.2, 3.0);System.out.println("原始数据: " + Arrays.toString(data));System.out.println("MAD方法: " + Arrays.toString(result1));System.out.println("连续计数方法: " + Arrays.toString(result2));System.out.println("EWMA方法: " + Arrays.toString(result3));
}
方法选择建议
MAD方法:对非正态分布数据更鲁棒,适合数据分布未知的情况
连续计数方法:适合已知异常值最大连续长度的情况
EWMA方法:适合时间序列数据,对缓慢变化的异常更敏感
变化率方法:适合检测数据中突然的连续跳跃
对于特别长的连续异常,可能需要结合领域知识或更复杂的算法,如基于机器学习的方法。