在Azure中启用VM诊断非常麻烦。我已经使用ARM模板,Azure PowerShell SDK和Azure CLI使其工作。但是我已经尝试了几天,使用Terraform和azurerm_virtual_machine_extension资源为Windows和Linux VM启用VM诊断。还是不行,!
这是我到目前为止的内容(我对此进行了一些微调以简化操作,因此希望我的手动编辑不会破坏任何内容:
resource "azurerm_virtual_machine_extension" "vm-linux" {
count = "${local.is_windows_vm == "false" ? 1 : 0}"
depends_on = ["azurerm_virtual_machine_data_disk_attachment.vm"]
name = "LinuxDiagnostic"
location = "${var.location}"
resource_group_name = "${var.resource_group_name}"
virtual_machine_name = "${local.vm_name}"
publisher = "Microsoft.Azure.Diagnostics"
type = "LinuxDiagnostic"
type_handler_version = "3.0"
auto_upgrade_minor_version = "true"
# The JSON file referenced below was created by running "az vm diagnostics get-default-config", and adding/verifying the "__DIAGNOSTIC_STORAGE_ACCOUNT__" and "__VM_RESOURCE_ID__" placeholders.
settings = <<SETTINGS
{
"ladCfg": "${base64encode(replace(replace(file("${path.module}/.diag-settings/linux_diag_config.json"), "__DIAGNOSTIC_STORAGE_ACCOUNT__", "${module.vm_storage_account.name}"), "__VM_RESOURCE_ID__", "${local.metricsresourceid}"))}",
"storageAccount": "${module.vm_storage_account.name}"
}
SETTINGS
# SAS token below: Do not include the leading question mark, as per https://docs.microsoft.com/en-us/azure/virtual-machines/extensions/diagnostics-linux.
protected_settings = <<SETTINGS
{
"storageAccountName": "${module.vm_storage_account.name}",
"storageAccountSasToken": "${replace(data.azurerm_storage_account_sas.current.sas, "/^\\?/", "")}",
"storageAccountEndPoint": "https://core.windows.net/"
}
SETTINGS
}
resource "azurerm_virtual_machine_extension" "vm-win" {
count = "${local.is_windows_vm == "true" ? 1 : 0}"
depends_on = ["azurerm_virtual_machine_data_disk_attachment.vm"]
name = "Microsoft.Insights.VMDiagnosticsSettings"
location = "${var.location}"
resource_group_name = "${var.resource_group_name}"
virtual_machine_name = "${local.vm_name}"
publisher = "Microsoft.Azure.Diagnostics"
type = "IaaSDiagnostics"
type_handler_version = "1.9"
auto_upgrade_minor_version = "true"
# The JSON file referenced below was created by running "az vm diagnostics get-default-config --is-windows-os", and adding/verifying the "__DIAGNOSTIC_STORAGE_ACCOUNT__" and "__VM_RESOURCE_ID__" placeholders.
settings = <<SETTINGS
{
"wadCfg": "${base64encode(replace(replace(file("${path.module}/.diag-settings/windows_diag_config.json"), "__DIAGNOSTIC_STORAGE_ACCOUNT__", "${module.vm_storage_account.name}"), "__VM_RESOURCE_ID__", "${local.metricsresourceid}"))}",
"storageAccount": "${module.vm_storage_account.name}"
}
SETTINGS
protected_settings = <<SETTINGS
{
"storageAccountName": "${module.vm_storage_account.name}",
"storageAccountSasToken": "${data.azurerm_storage_account_sas.current.sas}",
"storageAccountEndPoint": "https://core.windows.net/"
}
SETTINGS
}
注意,对于Linux和Windows,我都会根据注释从代码库中的JSON文件加载诊断详细信息。这些是Azure提供的默认配置,因此它们应该有效。
[当我部署这些时,Linux VM扩展成功部署,但是在Azure门户中,扩展显示“在生成的mdsd配置中检测到问题”。如果我查看VM的“诊断设置”,它会显示“遇到错误:TypeError:对象不支持属性或方法'diagnosticMonitorConfiguration'”。Windows VM扩展完全无法部署,称它“无法读取配置”。如果我在门户网站中查看扩展名,它将显示以下错误:
"code": "ComponentStatus//failed/-3",
"level": "Error",
"displayStatus": "Provisioning failed",
"message": "Error starting the diagnostics extension"
[如果我查看“诊断设置”窗格,它只会挂上一个永无休止的“ .....”动画。
但是,如果我查看两个VM扩展的“ terraform apply”输出,则解码后的设置看起来完全符合预期,将配置文件与正确替换的占位符匹配。
有关如何使其正常工作的任何建议?
提前感谢!
到目前为止,我已经使Windows诊断程序在我们的环境中可以100%正常工作。看来AzureRM API对于正在发送的配置<>挑剔。我们一直在使用powershell来启用它,并且在powershell中使用的xmlCfg与terraform不兼容。到目前为止,这对我们有用:(settings / protected_settings名称区分大小写!aka xmlCfg有效,而xmlcfg无效)main.cf
#########################################################
# VM Extensions - Windows In-Guest Monitoring/Diagnostics
#########################################################
resource "azurerm_virtual_machine_extension" "InGuestDiagnostics" {
name = var.compute["InGuestDiagnostics"]["name"]
location = azurerm_resource_group.VMResourceGroup.location
resource_group_name = azurerm_resource_group.VMResourceGroup.name
virtual_machine_name = azurerm_virtual_machine.Compute.name
publisher = var.compute["InGuestDiagnostics"]["publisher"]
type = var.compute["InGuestDiagnostics"]["type"]
type_handler_version = var.compute["InGuestDiagnostics"]["type_handler_version"]
auto_upgrade_minor_version = var.compute["InGuestDiagnostics"]["auto_upgrade_minor_version"]
settings = <<SETTINGS
{
"xmlCfg": "${base64encode(templatefile("${path.module}/templates/wadcfgxml.tmpl", { vmid = azurerm_virtual_machine.Compute.id }))}",
"storageAccount": "${data.azurerm_storage_account.InGuestDiagStorageAccount.name}"
}
SETTINGS
protected_settings = <<PROTECTEDSETTINGS
{
"storageAccountName": "${data.azurerm_storage_account.InGuestDiagStorageAccount.name}",
"storageAccountKey": "${data.azurerm_storage_account.InGuestDiagStorageAccount.primary_access_key}",
"storageAccountEndPoint": "https://core.windows.net"
}
PROTECTEDSETTINGS
}
tfvars
InGuestDiagnostics = {
name = "WindowsDiagnostics"
publisher = "Microsoft.Azure.Diagnostics"
type = "IaaSDiagnostics"
type_handler_version = "1.16"
auto_upgrade_minor_version = "true"
}
wadcfgxml.tmpl(为简便起见,我剪掉了一些Perf计数器)
<WadCfg>
<DiagnosticMonitorConfiguration overallQuotaInMB="5120">
<DiagnosticInfrastructureLogs scheduledTransferLogLevelFilter="Error"/>
<Metrics resourceId="${vmid}">
<MetricAggregation scheduledTransferPeriod="PT1H"/>
<MetricAggregation scheduledTransferPeriod="PT1M"/>
</Metrics>
<PerformanceCounters scheduledTransferPeriod="PT1M">
<PerformanceCounterConfiguration counterSpecifier="\Processor Information(_Total)\% Processor Time" sampleRate="PT60S" unit="Percent" />
<PerformanceCounterConfiguration counterSpecifier="\Processor Information(_Total)\% Privileged Time" sampleRate="PT60S" unit="Percent" />
<PerformanceCounterConfiguration counterSpecifier="\Processor Information(_Total)\% User Time" sampleRate="PT60S" unit="Percent" />
<PerformanceCounterConfiguration counterSpecifier="\Processor Information(_Total)\Processor Frequency" sampleRate="PT60S" unit="Count" />
<PerformanceCounterConfiguration counterSpecifier="\System\Processes" sampleRate="PT60S" unit="Count" />
<PerformanceCounterConfiguration counterSpecifier="\SQLServer:SQL Statistics\SQL Re-Compilations/sec" sampleRate="PT60S" unit="Count" />
</PerformanceCounters>
<WindowsEventLog scheduledTransferPeriod="PT1M">
<DataSource name="Application!*[System[(Level = 1 or Level = 2)]]"/>
<DataSource name="Security!*[System[(Level = 1 or Level = 2)]"/>
<DataSource name="System!*[System[(Level = 1 or Level = 2)]]"/>
</WindowsEventLog>
</DiagnosticMonitorConfiguration>
</WadCfg>