基于MATLAB的t-SNE算法多合成数据集降维可视化实现
1. 合成数据集生成与预处理
% 生成5类合成数据集
n_samples = 1000;
rng(42); % 固定随机种子% 创建数据集
datasets = struct();
datasets.blobs = [make_blobs(n_samples,5,1.0,1,0)];
datasets.moons = [make_moons(n_samples,0.1,1)];
datasets.circles = [make_circles(n_samples,0.05,0.5,1)];
datasets.spiral = spiral_data(n_samples); % 自定义螺旋生成函数
datasets.random = rand(n_samples,2);% 合并数据与标签
X_list = cell(1,5);
y_list = cell(1,5);
for i = 1:5X_list{i} = datasets.(['blobs','moons','circles','spiral','random]){i};y_list{i} = datasets.(['blobs','moons','circles','spiral','random]){i}.Y;
end
2. t-SNE降维核心实现
% 统一参数设置
tsne_opts = struct(...'NumDimensions',2,...'Perplexity',30,...'LearnRate',200,...'NumIterations',1000,...'Algorithm','barnes-hut'); % 更高效近似算法% 执行降维
results = cell(1,5);
for i = 1:5[results{i}.Y, results{i}.P] = tsne(X_list{i}, ...'NumDimensions',tsne_opts.NumDimensions,...'Perplexity',tsne_opts.Perplexity,...'LearnRate',tsne_opts.LearnRate,...'NumIterations',tsne_opts.NumIterations,...'Algorithm',tsne_opts.Algorithm);
end
3. 降维效果可视化
% 创建可视化画布
figure('Position',[100,100,1200,800]);
sgtitle('t-SNE降维效果对比');% 分块绘制
subplot(2,3,1);
gscatter(results{1}.Y(:,1), results{1}.Y(:,2), y_list{1});
title('Blobs数据集');
xlabel('维度1'); ylabel('维度2');subplot(2,3,2);
gscatter(results{2}.Y(:,1), results{2}.Y(:,2), y_list{2});
title('Moons数据集');subplot(2,3,3);
gscatter(results{3}.Y(:,1), results{3}.Y(:,2), y_list{3});
title('Circles数据集');subplot(2,3,4);
gscatter(results{4}.Y(:,1), results{4}.Y(:,2), y_list{4});
title('Spiral数据集');subplot(2,3,5);
gscatter(results{5}.Y(:,1), results{5}.Y(:,2), y_list{5});
title('Random数据集');% 调整布局
set(gcf,'PaperPositionMode','auto');
4. 关键参数影响分析
4.1 Perplexity参数对比
perplexities = [5,30,50];
figure;
for i = 1:3[Y,~] = tsne(datasets.moons{1}, 'Perplexity', perplexities(i));subplot(1,3,i);gscatter(Y(:,1), Y(:,2), datasets.moons{1}.Y);title(sprintf('Perplexity=%d', perplexities(i)));
end
4.2 学习率优化
learning_rates = [10,100,500];
figure;
for i = 1:3[Y,~] = tsne(datasets.spiral{1}, 'LearnRate', learning_rates(i));subplot(1,3,i);gscatter(Y(:,1), Y(:,2), datasets.spiral{1}.Y);title(sprintf('LR=%d', learning_rates(i)));
end
5. 降维效果量化评估
% 使用KNN分类器评估结构保留度
k = 5;
scores = zeros(5,1);
for i = 1:5% 训练集降维X_train = X_list{i}(1:800,:);y_train = y_list{i}(1:800);X_test = X_list{i}(801:end,:);y_test = y_list{i}(801:end,:);% t-SNE降维[Y_train,~] = tsne(X_train, tsne_opts);[Y_test,~] = tsne(X_test, tsne_opts);% KNN分类mdl = fitcknn(Y_train, y_train, 'NumNeighbors',k);y_pred = predict(mdl, Y_test);scores(i) = sum(y_pred == y_test)/numel(y_test);
end% 显示结果
bar([0.5,1.5,2.5,3.5,4.5], scores);
set(gca,'XTickLabel',{'Blobs','Moons','Circles','Spiral','Random'});
ylabel('分类准确率');
title('t-SNE结构保留能力评估');
参考代码 SNE降维算法 www.youwenfan.com/contentcsi/66049.html
6. 典型结果分析
数据集类型 | 理想特性 | t-SNE表现 |
---|---|---|
Blobs | 线性可分簇 | 完美分离,保留全局结构 |
Moons | 非线性流形 | 有效展开月牙形结构,无重叠 |
Circles | 同心圆分布 | 精确还原环形层次,区分内外环 |
Spiral | 复杂螺旋结构 | 中等复杂度下可分离,高密度区域存在轻微重叠 |
Random | 无内在结构 | 随机分布,验证算法不引入伪结构 |
7. 性能优化策略
7.1 PCA预处理加速
% PCA降维至50维加速t-SNE
pca_model = pca(X_list{3});
X_pca = X_list{3} * pca_model(:,1:50);
[Y_pca,~] = tsne(X_pca);
7.2 多尺度t-SNE
% 分层降维:先局部后全局
local_tsne = tsne(X_list{2}, 'NumDimensions',3, 'Perplexity',15);
global_tsne = tsne(local_tsne, 'NumDimensions',2, 'Perplexity',50);