py_innodb_page_info.py表空间分析
介绍
由《MySQL技术内幕 InnoDB存储引擎》一书的作者姜承尧开发的 `py_innodb_page_info.py` 是一款功能强大的工具,旨在深入分析MySQL表空间中的页面类型及其详细信息。该工具采用Python编写,由三个文件组成:`py_innodb_page_info.py`、`mylib.py` 和 `include.py`。为了确保工具的正常运行,这三个文件需要放置在同一个目录下。
值得注意的是,该工具需要在Python 2的环境中运行。如果在Python 3环境下使用,可能会遇到兼容性问题,导致运行出错。此外,`py_innodb_page_info.py` 工具专门设计用于解析MySQL 5.7版本的`.ibd`文件。对于MySQL 8.0及更高版本的`.ibd`文件,该工具可能无法正确解析。
include.py
#include.py
#encoding=utf-8
INNODB_PAGE_SIZE = 1024 * 16 # InnoDB Page 16K# Start of the data on the page
FIL_PAGE_DATA = 38FIL_PAGE_OFFSET = 4 # page offset inside space
FIL_PAGE_TYPE = 24 # File page type# Types of an undo log segment */
TRX_UNDO_INSERT = 1
TRX_UNDO_UPDATE = 2# On a page of any file segment, data may be put starting from this offset
FSEG_PAGE_DATA = FIL_PAGE_DATA# The offset of the undo log page header on pages of the undo log
TRX_UNDO_PAGE_HDR = FSEG_PAGE_DATAPAGE_LEVEL = 26 #level of the node in an index tree; the leaf level is the level 0 */innodb_page_type = {'0000':u'Freshly Allocated Page','0002':u'Undo Log Page','0003':u'File Segment inode','0004':u'Insert Buffer Free List','0005':u'Insert Buffer Bitmap','0006':u'System Page','0007':u'Transaction system Page','0008':u'File Space Header','0009':u'Extend Description Page','000a':u'Uncompressed BLOB Page','000b':u'1st compressed BLOB Page','000c':u'Subsequent compressed BLOB Page','45bf':u'B-tree Node'
}
innodb_page_direction = {'0000': 'Unknown(0x0000)','0001': 'Page Left','0002': 'Page Right','0003': 'Page Same Rec','0004': 'Page Same Page','0005': 'Page No Direction','ffff': 'Unkown2(0xffff)'
}
mylib.py
#encoding=utf-8
import os
import include
from include import *VARIABLE_FIELD_COUNT = 1
NULL_FIELD_COUNT = 0class myargv(object):def __init__(self, argv):self.argv = argvself.parms = {}self.tablespace = ''# 解析输入参数def parse_cmdline(self):argv = self.argvif len(argv) == 1:print 'Usage: python py_innodb_page_info.py [OPTIONS] tablespace_file'print 'For more options, use python py_innodb_page_info.py -h'return 0while argv:if argv[0][0] == '-':if argv[0][1] == 'h':self.parms[argv[0]] = ''argv = argv[1:]breakif argv[0][1] == 'v':self.parms[argv[0]] = ''argv = argv[1:]else:self.parms[argv[0]] = argv[1]argv = argv[2:]else:self.tablespace = argv[0]argv = argv[1:]if self.parms.has_key('-h'):print 'Get InnoDB Page Info'print 'Usage: python py_innodb_page_info.py [OPTIONS] tablespace_file\n'print 'The following options may be given as the first argument:'print '-h help 'print '-o output put the result to file'print '-t number thread to anayle the tablespace file'print '-v verbose mode'return 0return 1def mach_read_from_n(page,start_offset,length):ret = page[start_offset:start_offset+length]return ret.encode('hex')# main方法
def get_innodb_page_type(myargv):# 读取 .ibd 文件,存入 ff=file(myargv.tablespace,'rb')# INNODB_PAGE_SIZE=16KB,f 一定是16KB的整数倍,一页大小是16KB# 使用 f 的字节总数除以 16KB,得到的 fsize 表示页数fsize = os.path.getsize(f.name)/INNODB_PAGE_SIZEret = {}# 枚举每一页for i in range(fsize):# 读取16KB数据,存入page,是第i的数据# page 是二进制文件page = f.read(INNODB_PAGE_SIZE)# page_offset 是 page[4, 8) 字符所表示的数据page_offset = mach_read_from_n(page,FIL_PAGE_OFFSET,4)# page_type 是 page[24, 26) 字符所表示的数据page_type = mach_read_from_n(page,FIL_PAGE_TYPE,2)# 如果加了参数 -v,表示输出详细数据if myargv.parms.has_key('-v'):# page_type 为 45bf,表示 B-tree Node,输出它的信息if page_type == '45bf':# page_level 是 page[64, 66) 字符所表示的数据page_level = mach_read_from_n(page,FIL_PAGE_DATA+PAGE_LEVEL,2)print "page offset %s, page type <%s>, page level <%s>"%(page_offset,innodb_page_type[page_type],page_level)# page_type 对应 include.py 中的 innodb_page_type,输出它的信息else:print "page offset %s, page type <%s>"%(page_offset,innodb_page_type[page_type])# 统计数据if not ret.has_key(page_type):ret[page_type] = 1else:ret[page_type] = ret[page_type] + 1# 输出fsize,表示有多少页print "Total number of page: %d:"%fsize# 输出统计数据for type in ret:print "%s: %s"%(innodb_page_type[type],ret[type])
py_innodb_page_info.py
#! /usr/bin/env python
#encoding=utf-8
import mylib
from sys import argv
from mylib import myargv# main函数
if __name__ == '__main__':myargv = myargv(argv)if myargv.parse_cmdline() == 0:passelse:# 执行main方法mylib.get_innodb_page_type(myargv)
演示
python2 py_innodb_page_info.py -v demo1/example_table.ibd
page offset 00000000, page type <File Space Header>
page offset 00000001, page type <Insert Buffer Bitmap>
page offset 00000002, page type <File Segment inode>
page offset 00000003, page type <B-tree Node>, page level <0000> //数据页,0001表示根层,0000表示叶子层
page offset 00000004, page type <B-tree Node>, page level <0000>
page offset 00000005, page type <B-tree Node>, page level <0000>
page offset 00000006, page type <B-tree Node>, page level <0000>
page offset 00000007, page type <B-tree Node>, page level <0000>
page offset 00000008, page type <B-tree Node>, page level <0000>
page offset 00000000, page type <Freshly Allocated Page>
page offset 00000000, page type <Freshly Allocated Page>
Total number of page: 11: //总共分配的页数
Freshly Allocated Page: 2 //可用的数据页
Insert Buffer Bitmap: 1 //插入缓冲页
File Space Header: 1 //文件空间头
B-tree Node: 6 //数据页
File Segment inode: 1 //文件端inonde