import duckdb
# DuckDB 直接查询 Lance 数据集
duck_df = duckdb.query("""SELECTcategory,COUNT(*) as count,AVG(price) as avg_price,MAX(price) as max_priceFROM datasetWHERE a > 20GROUP BY categoryORDER BY avg_price DESC
""")
print(type(duck_df)) # <class '_duckdb.DuckDBPyRelation'>
result = duck_df.to_df() # 不推荐 会全部加载到内存 除非是最终结果
type(result) # pandas.core.frame.DataFrame
duck_df.show() # pyarrow的show
支持 with...as...
# 支持视图查询
duckdb.query("""
withv1 as (select * from duck_df where count > 15),v2 as (select * from v1 where avg_price > 400
)
select * from v2
""").show()
支持临时试图
# 创建临时试图
duckdb.sql("create or replace view v_tmp1 as select * from duck_df where category = 'A'")
duckdb.query("select *,1 as f1 from v_tmp1").show()