- 准备下载好的Argo数据(nc)
- 提取目标区域的剖面数据
- 提取目标时间范围的数据
- 筛选质量控制在阈值范围以上的剖面数据
- (nan_threshold = 0.25即要求每条剖面数据的低质量数据点小于25%)
- 提取出的剖面数据每月一个文件存储
- 每月一图生成argo位置
- 生成最后五个剖面数据图
import os
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from collections import defaultdict
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import warnings
import cartopy.crs as ccrs
import cartopy.feature as cfeaturewarnings.filterwarnings("ignore", message="Discarding nonzero nanoseconds in conversion")# ======================== 参数配置 ========================
lat_min, lat_max = 0, 60
lon_min, lon_max = 100, 160
valid_qc = [b'1', b'2']
nan_threshold = 0.25start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 12, 31)input_root = r""
nc_save_dir = r""
png_save_dir = r""
profile_fig_save_dir = r""os.makedirs(nc_save_dir, exist_ok=True)
os.makedirs(png_save_dir, exist_ok=True)
os.makedirs(profile_fig_save_dir, exist_ok=True) # 确保新文件夹存在# ======================== 准备结构体 ========================
all_nc_files = glob(os.path.join(input_root, "**", "*.nc"), recursive=True)
monthly_profiles = defaultdict(list)
yearly_profiles = defaultdict(int)
total_profiles_inspected = 0# ======================== 遍历所有 Argo 文件 ========================
for file_path in tqdm(all_nc_files, desc="🔍 正在检索剖面文件"):try:ds = xr.open_dataset(file_path)n_prof = ds.sizes["N_PROF"]total_profiles_inspected += n_proffor i in range(n_prof):# ...(此处循环内部逻辑与上一版完全相同,为节省空间已折叠)...lat = float(ds["LATITUDE"].values[i])lon = float(ds["LONGITUDE"].values[i])time_np = ds["JULD"].values[i]time_py = pd.Timestamp(time_np).to_pydatetime()if not (lat_min <= lat <= lat_max and lon_min <= lon <= lon_max):continueif not (start_date <= time_py <= end_date):continuepres_data = ds["PRES"].values[i]temp_data = ds["TEMP"].values[i]psal_data = ds["PSAL"].values[i]temp_qc = ds["TEMP_QC"].values[i]psal_qc = ds["PSAL_QC"].values[i]temp_mask = np.isin(temp_qc, valid_qc)psal_mask = np.isin(psal_qc, valid_qc)if len(temp_mask) != len(temp_data) or len(psal_mask) != len(psal_data):print(f"警告:在文件 {os.path.basename(file_path)} 中,剖面 {i} 的QC与数据长度不匹配,已跳过。")continuetemp_data[~temp_mask] = np.nanpsal_data[~psal_mask] = np.nantotal_levels = len(temp_data)if total_levels == 0:continuenan_temp_ratio = np.count_nonzero(np.isnan(temp_data)) / total_levelsnan_psal_ratio = np.count_nonzero(np.isnan(psal_data)) / total_levelsif nan_temp_ratio > nan_threshold or nan_psal_ratio > nan_threshold:continueprofile = {"LATITUDE": lat,"LONGITUDE": lon,"JULD": time_np,"PRES": pres_data,"TEMP": temp_data,"PSAL": psal_data,}month_key = time_py.strftime("%Y%m")year_key = time_py.strftime("%Y")monthly_profiles[month_key].append(profile)yearly_profiles[year_key] += 1ds.close()except Exception as e:print(f"读取或处理文件失败: {file_path},错误: {e}")# ======================== 工具函数 ========================
def pad(array_list, max_len):n = len(array_list)result = np.full((n, max_len), np.nan, dtype=np.float32)for i, arr in enumerate(array_list):clean_arr = arr[~np.isnan(arr)]result[i, :len(clean_arr)] = clean_arrreturn resultdef plot_profile_locations(lats, lons, month_str, save_dir, lon_lims, lat_lims):plt.figure(figsize=(10, 10))ax = plt.axes(projection=ccrs.PlateCarree())ax.set_extent([lon_lims[0], lon_lims[1], lat_lims[0], lat_lims[1]], crs=ccrs.PlateCarree())ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='#d9d9d9', zorder=1)ax.add_feature(cfeature.OCEAN, facecolor='#a7d8de', zorder=0)ax.add_feature(cfeature.COASTLINE, zorder=1)ax.scatter(lons, lats, s=15, c='royalblue', alpha=0.7, edgecolors='k', linewidth=0.5,transform=ccrs.PlateCarree(), zorder=2)gl = ax.gridlines(draw_labels=True, dms=False, x_inline=False, y_inline=False,color='gray', linestyle='--', alpha=0.5)gl.top_labels = Falsegl.right_labels = Falseax.set_title(f"Argo Profile Locations - {month_str}", fontsize=16)plt.tight_layout()fig_path = os.path.join(save_dir, f"profile_map_{month_str}.png")plt.savefig(fig_path, dpi=300, bbox_inches='tight')plt.close()return fig_path# --- 新增:绘制单个温盐剖面图的函数 ---
def plot_ts_profile(profile, save_path):"""为单个剖面数据绘制温盐剖面图"""# 提取数据pres = profile['PRES']temp = profile['TEMP']psal = profile['PSAL']lat, lon = profile['LATITUDE'], profile['LONGITUDE']time_str = pd.Timestamp(profile['JULD']).strftime('%Y-%m-%d %H:%M')# 创建画布和第一个坐标轴(温度)fig, ax1 = plt.subplots(figsize=(8, 10))fig.suptitle(f"Argo Profile\nDate: {time_str}, Lat: {lat:.2f}, Lon: {lon:.2f}", fontsize=16)# 绘制温度曲线ax1.set_xlabel("Temperature (°C)", color='red', fontsize=12)ax1.set_ylabel("Pressure (dbar)", fontsize=12)ax1.plot(temp, pres, '.-', color='red', label='Temperature')ax1.tick_params(axis='x', labelcolor='red')ax1.grid(linestyle='--', alpha=0.6)# 创建共享Y轴的第二个坐标轴(盐度)ax2 = ax1.twiny()ax2.set_xlabel("Salinity (PSU)", color='blue', fontsize=12)ax2.plot(psal, pres, '.-', color='blue', label='Salinity')ax2.tick_params(axis='x', labelcolor='blue')# Y轴(压力)反转,压力大的在下面ax1.invert_yaxis()# 保存图像plt.savefig(save_path, dpi=300, bbox_inches='tight')plt.close(fig)# ======================== 保存数据和图像 ========================
all_valid_profiles = [] # 创建一个列表存储所有有效剖面
print("\n📊 每月剖面数量统计与数据保存:")
for month, profiles in sorted(monthly_profiles.items()):n_prof = len(profiles)if n_prof == 0:continueall_valid_profiles.extend(profiles) # 将每月剖面添加到总列表中max_levels = 0for p in profiles:max_levels = max(max_levels, np.count_nonzero(~np.isnan(p["PRES"])))if max_levels == 0: continueds_out = xr.Dataset(# ... (Dataset创建逻辑不变) ...data_vars={"LATITUDE": ("N_PROF", [p["LATITUDE"] for p in profiles]),"LONGITUDE": ("N_PROF", [p["LONGITUDE"] for p in profiles]),"JULD": ("N_PROF", [p["JULD"] for p in profiles]),"PRES": (("N_PROF", "N_LEVELS"), pad([p["PRES"] for p in profiles], max_levels)),"TEMP": (("N_PROF", "N_LEVELS"), pad([p["TEMP"] for p in profiles], max_levels)),"PSAL": (("N_PROF", "N_LEVELS"), pad([p["PSAL"] for p in profiles], max_levels)),},coords={"profile": ("N_PROF", np.arange(n_prof)), "level": ("N_LEVELS", np.arange(max_levels))},attrs={"title": f"Filtered Argo Profiles for {month}", "date_created": datetime.now().strftime("%Y-%m-%d")})nc_path = os.path.join(nc_save_dir, f"filtered_profiles_{month}.nc")ds_out.to_netcdf(nc_path, format="NETCDF4")print(f"✅ 已保存 NetCDF:{nc_path}")lats = [p["LATITUDE"] for p in profiles]lons = [p["LONGITUDE"] for p in profiles]png_path = plot_profile_locations(lats, lons, month, png_save_dir, (lon_min, lon_max), (lat_min, lat_max))print(f"🖼️ 已保存位置图:{png_path}")print(f" {month}: {n_prof} 条剖面")# ======================== 绘制最后5条剖面图 ========================
print("\n📈 正在绘制最后的剖面图示例...")
if len(all_valid_profiles) >= 5:profiles_to_plot = all_valid_profiles[-5:]print(f" 选取最后 {len(profiles_to_plot)} 条有效剖面进行绘图。")for i, profile in enumerate(profiles_to_plot):save_path = os.path.join(profile_fig_save_dir, f"profile_example_{i+1}.png")plot_ts_profile(profile, save_path)print(f" ✅ 已保存剖面图示例:{save_path}")
elif len(all_valid_profiles) > 0:print(f" 有效剖面总数不足5条,仅绘制 {len(all_valid_profiles)} 条。")for i, profile in enumerate(all_valid_profiles):save_path = os.path.join(profile_fig_save_dir, f"profile_example_{i+1}.png")plot_ts_profile(profile, save_path)print(f" ✅ 已保存剖面图示例:{save_path}")
else:print(" 未找到任何有效剖面,无法绘制示例图。")# ======================== 最终结果汇总 ========================
total_profiles_filtered = sum(yearly_profiles.values())
pass_rate = (total_profiles_filtered / total_profiles_inspected * 100) if total_profiles_inspected > 0 else 0print("\n" + "="*55)
print("📊 最终结果汇总")
print("-"*55)
print(f"{'总计检查剖面数量'.ljust(25)}: {total_profiles_inspected}")
print(f"{'成功筛选有效剖面数量'.ljust(25)}: {total_profiles_filtered}")
print(f"{'数据筛选通过率'.ljust(25)}: {pass_rate:.2f}%")
print("")print("按年统计细分:")
if not yearly_profiles:print(" - 未筛选出任何年份的数据。")
else:for year, count in sorted(yearly_profiles.items()):print(f" - {year} 年: {count} 条剖面")
print("="*55)