【Python】读取xlsb或xlsx的单一或连续单元格工具类
代码主要来自Kimi.ai,有修改。
优先使用工作表序号索引工作表,序号从1开始。
运行需要先安装openpyxl和pyxlsb两个第三方库。
import openpyxl
from openpyxl.utils import range_boundaries
from pyxlsb import open_workbook as open_xlsb
class ExcelReader:
def __init__(self, file_path):
"""
初始化ExcelReader类
:param file_path: Excel文件路径
"""
self.file_path = file_path
self.is_xlsb = file_path.endswith('.xlsb')
self.is_xlsx = file_path.endswith('.xlsx')
if not self.is_xlsb and not self.is_xlsx:
raise ValueError("不支持的文件格式,仅支持.xlsb和.xlsx文件")
# 替换None单元格的默认值
# self.nan = ''
self.nan_val = 0
def get_sheet_by_index(self, sheet_index):
"""
根据工作表序号获取工作表对象
:param sheet_index: 工作表序号(从1开始)
:return: 工作表对象
"""
if self.is_xlsb:
with open_xlsb(self.file_path) as wb:
sheets = list(wb.sheets)
if sheet_index < 1 or sheet_index > len(sheets):
raise ValueError("工作表序号超出范围")
return sheets[sheet_index - 1]
elif self.is_xlsx:
wb = openpyxl.load_workbook(self.file_path, data_only=True)
if sheet_index < 1 or sheet_index > len(wb.sheetnames):
raise ValueError("工作表序号超出范围")
return wb.worksheets[sheet_index - 1]
def read_cell(self, sheet_index, cell_address):
"""
读取单个单元格的数据
:param sheet_index: 工作表序号(从1开始)
:param cell_address: 单元格地址,如'A1'
:return: 二维列表,包含单个单元格的数据
"""
sheet = self.get_sheet_by_index(sheet_index)
if self.is_xlsb:
with open_xlsb(self.file_path) as wb:
with wb.get_sheet(sheet) as sheet_data:
row, col = openpyxl.utils.coordinate_to_tuple(cell_address)
print(col, row)
for r in sheet_data.rows():
if r[0].r == row - 1:
cell_value = r[col - 1].v
return [[cell_value if cell_value is not None else self.nan_val]]
elif self.is_xlsx:
cell_value = sheet[cell_address].value
return [[cell_value if cell_value is not None else self.nan_val]]
def read_range(self, sheet_index, range_address):
"""
读取指定区域的数据
:param sheet_index: 工作表序号(从1开始)
:param range_address: 单元格区域地址,如'C1:F4'
:return: 二维列表,包含指定区域的数据
"""
sheet = self.get_sheet_by_index(sheet_index)
min_col, min_row, max_col, max_row = range_boundaries(range_address)
if self.is_xlsb:
with open_xlsb(self.file_path) as wb:
with wb.get_sheet(sheet) as sheet_data:
data = []
for r in sheet_data.rows():
if min_row <= r[0].r + 1 <= max_row:
row_data = []
for c in r:
if min_col <= c.c + 1 <= max_col:
cell_value = c.v
row_data.append(cell_value if cell_value is not None else 0)
if row_data:
data.append(row_data)
return data
elif self.is_xlsx:
data = []
for row in sheet.iter_rows(min_row=min_row, max_row=max_row, min_col=min_col, max_col=max_col, values_only=True):
row_data = [cell if cell is not None else self.nan_val for cell in row]
data.append(row_data)
return data
# 示例用法
if __name__ == "__main__":
file_path = "test.xlsb" # 替换为你的文件路径
reader = ExcelReader(file_path)
# 读取单个单元格
cell_data = reader.read_cell(1, "D6") # 读取第一个工作表的A1单元格
print("单个单元格数据:", cell_data)
# 读取单元格区域
range_data = reader.read_range(1, "A5:C9") # 读取第一个工作表的C1:F4区域
print("单元格区域数据:", range_data)