如何通过DuckDB访问模拟的s3存储桶

问题描述 投票:0回答:1

我有一个使用 AWS S3、DuckDB API 的无服务器 Python 代码。

代码工作正常,重点是这段代码的单元测试。

我正在使用unittest和moto python框架来测试这段代码。

问题是将 duckdb 连接配置为指向模拟的 s3 存储桶。

这是duckdb配置的代码片段:

            self.db_conn.execute(query=f"SET s3_region='{os.environ['AWS_REGION']}';")
            self.db_conn.execute(query=f"SET s3_access_key_id='{self.tenant_ctx_aws_credentials['Credentials']['AccessKeyId']}';")
            self.db_conn.execute(query=f"SET s3_secret_access_key='{self.tenant_ctx_aws_credentials['Credentials']['SecretAccessKey']}';")
            self.db_conn.execute(query=f"SET s3_session_token='{self.tenant_ctx_aws_credentials['Credentials']['SessionToken']}';")
            self.db_conn.execute(query=f"SET memory_limit='{self.memory_limit}';")

这是来自 lambda 函数的单元测试文件,该函数使用 moto 来模拟 AWS 服务:

@mock_s3
@mock_glue
@mock_ssm
@mock_sts
@mock_iam
@mock.patch.dict(os.environ)
class TestLambdaFunction(unittest.TestCase):

    maxDiff = None

    def setUp(self):
        try:
            # S3 setup:
            self.s3_client = boto3.client('s3', region_name=self.aws_region)
            self.s3_resource = boto3.resource('s3', region_name=self.aws_region)
            self.s3_bucket = self.s3_resource.create_bucket(Bucket=self.dp_s3_bucket_name, CreateBucketConfiguration={
                'LocationConstraint': self.aws_region})

所以我尝试让 duckdb 访问这个模拟的 s3 存储桶。

当我运行此测试时,我收到以下错误:

IO Error: HTTP GET error on 'https://{test_bucket_name}.s3.amazonaws.com/{s3_path}/test_file.parquet' (HTTP 400)

我尝试使用 boto3 客户端的 endpoint_url 参数,但没有帮助。

此外,据我所知,当 moto 运行时,它在 localhost:5000 上运行,所以我也尝试将 duckdb 配置指向这个 doamin:port 但它对我来说也不起作用。

python amazon-web-services unit-testing amazon-s3 duckdb
1个回答
0
投票
@pytest.fixture()
def mock_aws_env(monkeypatch) -> None:
    # first clear everything
    boto3.DEFAULT_SESSION = None
    S3FileSystem.clear_instance_cache()
    monkeypatch.delenv("AWS_CONFIG_FILE", raising=False)
    monkeypatch.delenv("AWS_SHARED_CREDENTIALS_FILE", raising=False)
    monkeypatch.delenv("AWS_PROFILE", raising=False)
    monkeypatch.delenv("AWS_DEFAULT_PROFILE", raising=False)
    monkeypatch.delenv("AWS_ACCOUNT", raising=False)
    monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
    monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
    monkeypatch.delenv("AWS_SECURITY_TOKEN", raising=False)
    monkeypatch.delenv("AWS_SESSION_TOKEN", raising=False)
    # now set some things
    aws_region = "us-east-1"
    aws_access_key_id = f"AWS_ACCESS_KEY_ID-{uuid4()}"
    aws_secret_access_key = f"AWS_SECRET_ACCESS_KEY-{uuid4()}"
    monkeypatch.setenv("AWS_REGION", aws_region)
    monkeypatch.setenv("AWS_ACCESS_KEY_ID", aws_access_key_id)
    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", aws_secret_access_key)
    monkeypatch.setenv("AWS_SECURITY_TOKEN", "testing")
    monkeypatch.setenv("AWS_SESSION_TOKEN", "testing")


@pytest.fixture
def mock_s3_server(mock_aws_env, monkeypatch) -> MotoService:
    """
    MotoService("s3")
    """
    with MotoService("s3") as svc:
        svc.reset()
        # duckdb requires a custom ENDPOINT env-var to find moto AWS services
        # https://duckdb.org/docs/extensions/httpfs/s3api.html
        # https://duckdb.org/docs/extensions/httpfs/s3api-legacy-authentication < 0.10
        endpoint_url = urlparse(svc.endpoint_url)
        duckdb_endpoint = f"{endpoint_url.hostname}:{endpoint_url.port}"
        monkeypatch.setenv("DUCKDB_S3_ENDPOINT", duckdb_endpoint)
        monkeypatch.setenv("DUCKDB_S3_USE_SSL", False)
        yield svc
        svc.reset()


#     f"""
#     CREATE SECRET mock_s3_secrets (
#         TYPE S3,
#         KEY_ID {aws_access_key_id},
#         SECRET {aws_secret_access_key},
#         REGION {aws_region},
#         ENDPOINT {moto_endpoint}
#     );
#     """
#     # duckdb < 0.10
#     """
#     SET s3_region = 'us-east-1';
#     SET s3_endpoint = '⟨domain⟩.⟨tld⟩:⟨port⟩';
#     SET s3_use_ssl = false;
#     SET s3_access_key_id = '⟨AWS access key id⟩';
#     SET s3_secret_access_key = '⟨AWS secret access key⟩';
#     """


@pytest.fixture()
def mock_s3_bucket(mock_s3_server, mocker, monkeypatch):
    # create the test bucket since this is all in a moto 'virtual' AWS account
    aws_region = os.getenv("AWS_REGION", "us-east-1")
    s3_data_bucket_name = f"test-app-data-{uuid4()}"

    with mock_s3():
        s3 = boto3.resource("s3", region_name=aws_region, endpoint_url=mock_s3_server.endpoint_url)
        s3_bucket_object = s3.create_bucket(
            Bucket=s3_data_bucket_name,
            ACL="public-read-write",
            CreateBucketConfiguration={"LocationConstraint": aws_region},
        )
        assert s3_bucket_object.name == s3_data_bucket_name
        yield s3_bucket_object

(注意,需要导入 lib ^^)

MotoService
来自https://github.com/dazza-codes/pytest-aiomoto

也可使用

SET s3_url_style='path';
代替
duckdb

© www.soinside.com 2019 - 2024. All rights reserved.