Google BigQuery
要从GBQ读取或写入,需要额外的依赖项:
$ pip install google-cloud-bigquery
读取
我们可以像这样将查询加载到DataFrame中:
from_arrow · 可在功能 pyarrow 上使用 · 可在功能 fsspec 上使用
import polars as pl
from google.cloud import bigquery
client = bigquery.Client()
# 执行查询。
QUERY = (
'SELECT name FROM `bigquery-public-data.usa_names.usa_1910_2013` '
'WHERE state = "TX" '
'LIMIT 100')
query_job = client.query(QUERY) # API 请求
rows = query_job.result() # 等待查询完成
df = pl.from_arrow(rows.to_arrow())
写入
from google.cloud import bigquery
client = bigquery.Client()
# Write DataFrame to stream as parquet file; does not hit disk
with io.BytesIO() as stream:
df.write_parquet(stream)
stream.seek(0)
parquet_options = bigquery.ParquetOptions()
parquet_options.enable_list_inference = True
job = client.load_table_from_file(
stream,
destination='tablename',
project='projectname',
job_config=bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.PARQUET,
parquet_options=parquet_options,
),
)
job.result() # Waits for the job to complete