json转excel xlsx文件
这里直接把json文件转成excel, 太复杂的结构嵌套结构,只转换顶部几层。
主要功能包括:1) 处理嵌套结构和复杂值(字典/列表);2) 支持数字和字符串键的排序;3) 特殊字段(如vv__objid和_TID_)优先排列;4) 可处理key-val格式的JSON数据。脚本通过递归处理JSON数据,保留顶层结构,并将复杂值序列化为JSON字符串,最终生成包含所有数据的Excel工作表。
使用方式为:python json_to_xlsx.py <json文件> <xlsx文件>。
import sys
import json
import csv
import io
from xlsxwriter.workbook import Workbook##
# Convert to string keeping encoding in mind...
##
def to_string(s):try:return str(s)except:#Change the encoding type if neededreturn s.encode('utf-8')def reduce_item(key, value):global reduced_item#Reduction Condition 1if type(value) is list:i=0for sub_item in value:newkey=key+'_'+to_string(i)reduce_item(newkey, sub_item)i=i+1#Reduction Condition 2elif type(value) is dict:sub_keys = value.keys()for sub_key in sub_keys:if key == "":newkey=to_string(sub_key)else:newkey=key+'_'+to_string(sub_key)reduce_item(newkey, value[sub_key])#Base Conditionelse:reduced_item[to_string(key)] = to_string(value)def merge_headers(header, keys):for k in keys:isexits = 0for c in header:if c==k :isexits = 1breakif isexits != 1:header.append(k)def convert_dict_to_array(data):"""将字典转换为数组,支持数字key和字符串key"""if isinstance(data, dict) and len(data) > 0:result_array = []# 检查所有key是否都是数字all_numeric_keys = Truefor key in data.keys():try:int(key)except (ValueError, TypeError):all_numeric_keys = Falsebreakif all_numeric_keys:# 数字key:按数字排序sorted_keys = sorted(data.keys(), key=lambda x: int(x))else:# 字符串key:按字母排序sorted_keys = sorted(data.keys())for key in sorted_keys:item = data[key]# 如果item是字典,添加_TID_字段if isinstance(item, dict):item = item.copy() # 避免修改原数据# 创建新字典,_TID_放在第一位new_item = {'_TID_': key}new_item.update(item)item = new_itemelse:# 如果item不是字典,包装成字典并添加_TID_item = {'_TID_': key, 'value': item}result_array.append(item)return result_arrayreturn datadef serialize_complex_values(data):"""遍历数组中每个元素的字段,如果字段值是复杂对象(dict或list),则将其序列化为JSON字符串"""if not isinstance(data, list):return dataresult = []for item in data:if isinstance(item, dict):new_item = {}for key, value in item.items():# 检查值是否为复杂对象if isinstance(value, (dict, list)):# 序列化为JSON字符串new_item[key] = json.dumps(value, ensure_ascii=False, separators=(',', ':'))else:# 保持简单类型不变new_item[key] = valueresult.append(new_item)else:result.append(item)return resultif __name__ == "__main__":if len(sys.argv) != 3:print ("\nUsage: python json_to_xlsx.py <json_in_file_path> <xlsxfile>\n")else:#Reading argumentsjson_file_path = sys.argv[1]xlsxfile = sys.argv[2]print("converting " + json_file_path + "\n")with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:json_value = fp.read()raw_data = json.loads(json_value)# 检查是否是key-val格式的JSON(从Excel转换而来)if isinstance(raw_data, dict) and 'key' in raw_data and 'val' in raw_data:# 转换key-val格式为标准数组格式keys = raw_data['key']values = raw_data['val']converted_data = []for row in values:row_dict = {}for i, key in enumerate(keys):if i < len(row):row_dict[key] = row[i]else:row_dict[key] = Noneconverted_data.append(row_dict)data_to_be_processed = converted_dataelse:data_to_be_processed = raw_data# 将字典转换为数组(支持数字key和字符串key)data_to_be_processed = convert_dict_to_array(data_to_be_processed)# 处理复杂对象值,将其序列化为字符串data_to_be_processed = serialize_complex_values(data_to_be_processed)processed_data = []header = []# 确保data_to_be_processed是数组if not isinstance(data_to_be_processed, list):print("错误:数据处理后仍不是数组格式")sys.exit(1)for item in data_to_be_processed:reduced_item = {}reduce_item("", item)merge_headers(header, reduced_item.keys())processed_data.append(reduced_item)# 检查最终处理的数据是否为空if not processed_data:print("错误:没有有效数据可处理,程序退出")sys.exit(1)# 重新排序header,确保vv__objid在第一列,_TID_在第二列if "vv__objid" in header:header.remove("vv__objid")if "_TID_" in header:header.remove("_TID_")# 创建新的header列表,将特定字段放在前面new_header = []if "vv__objid" in header or any("vv__objid" in item for item in processed_data):new_header.append("vv__objid")if "_TID_" in header or any("_TID_" in item for item in processed_data):new_header.append("_TID_")# 添加其余的headerfor h in header:if h != "vv__objid" and h != "_TID_":new_header.append(h)# 用新的header替换原来的header = new_headerworkbook = Workbook(xlsxfile)worksheet = workbook.add_worksheet()c=0;for h in header:worksheet.write(0,c,h)c = c +1;r=1for row in processed_data:c=0;for h in header:try:col = row[h]except:col = ""worksheet.write(r, c, col)c = c +1r = r +1workbook.close()print ("Just completed writing xlsx file with %d columns" % len(header))