python 操作 hive
安装依赖库
pip install thrift pure-sasl thrift_sasl future pyhive
导包
from pyhive import hive
from TCLIService.ttypes import TOperationState
连接Hive服务器
def get_hive_connection(host='localhost', port=10000, username='your_username', database='default'):try:conn = hive.Connection(host=host,port=port,username=username,database=database)print(f"成功连接到Hive服务器: {host}:{port}")return connexcept Exception as e:print(f"连接失败: {e}")return None
conn = get_hive_connection(host='36.41.67.11',port=10000,username='root',database='test'
)
print(conn)
成功连接到Hive服务器: 36.41.67.11:10000
<pyhive.hive.Connection object at 0x0000023723785C40>
执行查询并返回结果
def execute_query(connection, query):if not connection:print("没有可用的连接")return []try:cursor = connection.cursor()cursor.execute(query)status = cursor.poll().operationStateif status == TOperationState.FINISHED_STATE:print("查询成功执行")else:print(f"查询状态: {status}")results = cursor.fetchall()return resultsexcept Exception as e:print(f"查询执行失败: {e}")return []finally:if cursor:cursor.close()
sql = 'select gender, count(1) num from student group by gender'
result = execute_query(conn, sql)
print("\n查询结果:")
for row in result:print(row)
查询成功执行查询结果:
('女', 9)
('男', 11)