不同主机上的Pyspark DDL SQL查询

问题描述 投票:0回答:1

我对 python 和整个gluescript 都是新手。我想在不同的数据库服务器上执行删除查询。我不确定我做错了什么


def remove_from_db(driver, url, dbtable, user, password):
    query = "delete from abctable where end_date >='1-11-2023'"
    
    spark.write.format("jdbc") \
        .option("driver", driver) \
        .option("url", url) \
        .option("dbtable", dbtable) \
        .option("user", user) \
        .option("password", password) \
        .option("query", query) \
        .load()
python mysql pyspark aws-glue
1个回答
0
投票
from pyspark.sql import SparkSession

def remove_from_db(driver, url, dbtable, user, password):
    spark = SparkSession.builder.appName("DeleteOperation").getOrCreate()
    
    query = "delete from abctable where end_date >= '2023-01-11'"

    # Use the jdbc method with the url and driver options
    spark.read.format("jdbc") \
        .option("driver", driver) \
        .option("url", url) \
        .option("dbtable", dbtable) \
        .option("user", user) \
        .option("password", password) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("url", url) \
        .option("driver", driver) \
        .option("query", query) \
        .load()

    spark.stop()

# Example usage
remove_from_db("your_driver", "your_url", "your_table", "your_user", "your_password")

用户具有对指定表执行删除操作所需的权限。此外,执行删除操作时要小心,因为它们可能会从表中永久删除数据。

© www.soinside.com 2019 - 2024. All rights reserved.