我通过创建一个临时表(从我的数据框)然后将其合并到我的表中来更新雪花表中的数据。但是有没有更有效的方法来实现它呢?就像在没有临时表的情况下直接合并雪花表上的数据框一样?
因为我会在几张有几千行的桌子上做这件事。
我的代码:
import pandas as pd
from sqlalchemy import create_engine
from snowflake.connector.pandas_tools import pd_writer
engine = create_engine('snowflake://{user}:{password}@{account_identifier}/{database_name}/{schema_name}?warehouse={warehouse_name}&role={role_name}'.format(
user='user',
password=os.environ['SNOWFLAKE_PASSWORD'] ,
account_identifier='account_identifier',
database_name='DB_NAME',
schema_name='SHCEMA_NAME',
warehouse_name='WH',
role_name='ADMIN'
)
)
conn=engine.connect()
temp_table_name='source_table'
df=pd.DataFrame({'id':[1,2,3],'description':['a','b','c']})
#create temp table in snowflake
res_sql=df.to_sql(temp_table_name.lower(), engine, if_exists='replace',index=False, method=pd_writer, schema='SCHEMA_NAME')
#MERGE TEMP TABLE TO EXISTING TABLE
conn.cursor().execute(
'''
MERGE INTO target_table USING source_table
ON target_table.id = source_table.id
WHEN MATCHED THEN
UPDATE SET target_table.description = source_table.description
WHEN NOT MATCHED THEN
INSERT (ID, description) VALUES (source_table.id, source_table.description);
'''
)
#Drop temp table
conn.cursor().execute("DROP TABLE IF EXISTS DB_NAME.SCHEMA_NAME.source_table")