我尝试通过 Terraform 创建 Databricks 元存储。我的服务主体 (SP) 是包含 Databricks 工作区和 Databricks 帐户管理员的 azure 订阅的所有者。以下是以下脚本:
provider.tf:
# Define the provider configuration
data "azurerm_client_config" "current" {}
provider "azurerm" {
skip_provider_registration = false
features {
key_vault {
purge_soft_delete_on_destroy = true
recover_soft_deleted_key_vaults = true
}
}
use_msi = true
}
provider "databricks" {
# Configuration options
host = module.dbw.databricks_workspace_url
azure_workspace_resource_id = module.dbw.databricks_id
}
provider "databricks" {
# Configuration option
alias = "accounts"
host = "https://accounts.azuredatabricks.net"
account_id = "xxx"
}
模块/databricks_metastore/main.tf:
# Source: https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/azure-managed-identities
locals {
workspace_id_map = { for id in var.workspace_ids : id => id }
}
data "azurerm_resource_group" "rg" {
name = var.resource_group_name
}
data "azurerm_storage_account" "st" {
name = var.storage_account_name
resource_group_name = var.resource_group_name
}
resource "azurerm_role_assignment" "stblob_contributor" {
scope = data.azurerm_storage_account.st.id
role_definition_name = "Storage Blob Data Contributor"
principal_id = var.object_id
}
resource "azurerm_role_assignment" "stqueue_contributor" {
scope = data.azurerm_storage_account.st.id
role_definition_name = "Storage Queue Data Contributor"
principal_id = var.object_id
}
resource "azurerm_role_assignment" "staccount_contributor" {
scope = data.azurerm_storage_account.st.id
role_definition_name = "Storage Account Contributor"
principal_id = var.object_id
}
resource "azurerm_role_assignment" "eventgd_contributor" {
scope = data.azurerm_resource_group.rg.id
role_definition_name = "EventGrid EventSubscription Contributor"
principal_id = var.object_id
}
resource "databricks_metastore" "dbw_ms" {
name = var.metastore_name
storage_root = "abfss://${var.container_name}@${var.storage_account_name}.dfs.core.windows.net"
force_destroy = true
}
resource "databricks_metastore_assignment" "dbw_ms_assign" {
for_each = local.workspace_id_map
metastore_id = databricks_metastore.dbw_ms.id
workspace_id = each.value
}
main.dbw.tf:
module "dbw_metastore" {
depends_on = [module.dbw]
source = "./modules/databricks_metastore"
object_id = azurerm_user_assigned_identity.msi["datalake_uc"].principal_id
storage_account_name = local.datalake.raw.storage_account_name
resource_group_name = "my-resource-group"
metastore_name = "datalake"
container_name = local.datalake.raw.container["landing"].name
workspace_ids = [module.dbw.databricks_workspace_id]
}
当我运行 Terraform 计划时,我得到以下输出:
Terraform will perform the following actions:
# module.dbw_metastore.databricks_metastore.dbw_ms is tainted, so must be replaced
然后我运行 Terraform Apply:
╷
│ Error: cannot create metastore: UpdateMetastore Nothing to update.
│
│ with module.dbw_metastore.databricks_metastore.dbw_ms,
│ on modules/databricks_metastore/main.tf line 40, in resource "databricks_metastore" "dbw_ms":
│ 40: resource "databricks_metastore" "dbw_ms" {
即使存在此错误,我的元存储也会创建。错误从何而来?我需要解决此错误,因为后者阻止我执行与元存储相关的后续操作(例如工作区上的元存储分配,...)。每个 Terraform 步骤都通过 CI/CD 管道在 Azure DevOps 中运行。我该如何解决这个问题?
通过 Terraform 创建 Databricks 元存储
该问题可能是由于云资源中未记录在 Terraforms 状态中的手动更改引起的。在这种情况下,元存储被标记为“受污染”,必须根据 Terraform 进行替换。但是,由于它已经存在,当前元存储配置与 Terraform 旨在应用的状态或配置之间可能会出现冲突。确保 Terraform 状态与实际的 Azure 配置一致,使用
terraform refresh
更新状态,并尝试按照 Rui Jarimba 建议的 doc 取消状态文件污染,这将更正状态文件的格式。
演示配置:
terraform {
required_providers {
databricks = {
source = "databricks/databricks"
}
}
}
provider "azurerm" {
features {}
client_id = "SP-App-ID"
client_secret = ",SP-secret"
tenant_id = "Tenant-ID"
subscription_id = "Subscription-ID"
}
data "azurerm_resource_group" "this" {
name = "Vinay-rg"
}
data "azurerm_databricks_workspace" "this" {
name = "demodbvk"
resource_group_name = data.azurerm_resource_group.this.name
}
provider "databricks" {
alias = "accounts"
host = "https://accounts.azuredatabricks.net"
account_id = "Workspace-account-ID"
azure_tenant_id = "Tenant-ID"
azure_client_id = "Client-ID"
azure_client_secret = "secret"
}
resource "azurerm_databricks_access_connector" "unity" {
name = "vinaydatabricksmi"
resource_group_name = data.azurerm_resource_group.this.name
location = data.azurerm_resource_group.this.location
identity {
type = "SystemAssigned"
}
}
resource "azurerm_storage_account" "unity_catalog" {
name = "vinayadatabricksdemo"
resource_group_name = data.azurerm_resource_group.this.name
location = data.azurerm_resource_group.this.location
account_tier = "Standard"
account_replication_type = "GRS"
is_hns_enabled = true
}
resource "azurerm_storage_container" "unity_catalog" {
name = "vinay-container"
storage_account_name = azurerm_storage_account.unity_catalog.name
container_access_type = "private"
}
resource "azurerm_role_assignment" "example" {
scope = azurerm_storage_account.unity_catalog.id
role_definition_name = "Storage Blob Data Contributor"
principal_id = azurerm_databricks_access_connector.unity.identity[0].principal_id
}
resource "databricks_metastore" "this" {
provider = databricks.accounts
name = "primary"
storage_root = format("abfss://%s@%s.dfs.core.windows.net/",
azurerm_storage_container.unity_catalog.name,
azurerm_storage_account.unity_catalog.name)
force_destroy = true
region = "eastus"
}
输出: