获取错误:ClientError:调用 CreateHyperParameterTuningJob 操作时发生错误 (ValidationException):超参数调整作业的目标指标 [mse] 对于 [720646828776.dkr.ecr.ap-south-1 无效.amazonaws.com/sagemaker-xgboost:0.90-2-cpu-py3]算法。选择一个有效的客观指标。
import datetime
import time
import tarfile
import boto3
import pandas as pd
import numpy as np
from sagemaker import get_execution_role
import sagemaker
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sagemaker.tuner import (
IntegerParameter,
CategoricalParameter,
ContinuousParameter,
HyperparameterTuner,
)
s3 = boto3.client("s3")
sm_boto3 = boto3.client("sagemaker")
sagemaker_session = sagemaker.Session()
region = sess.boto_session.region_name
role = get_execution_role()
#Set the required configurations
model_name = "abc_model"
env = "dev"
#S3 Bucket
bucket = "abcpoc"
print("Using bucket " + bucket)
from sagemaker.debugger import Rule, rule_configs
from sagemaker.session import TrainingInput
s3_input_train = TrainingInput(
s3_data=f"s3://{default_bucket}/train/",content_type="csv")
s3_input_validation = TrainingInput(
s3_data=f"s3://{default_bucket}/validation/",content_type="csv")
prefix = 'output'
container=sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print(container)
xgb = sagemaker.estimator.Estimator(
image_uri=container,
role=role,
base_job_name="xgboost-random-search",
instance_count=1,
instance_type="ml.m4.xlarge",
output_path="s3://{}/{}/output".format(bucket, prefix),
sagemaker_session= sagemaker.Session(),
rules=[Rule.sagemaker(rule_configs.create_xgboost_report())]
)
xgb.set_hyperparameters(
max_depth = 5,
eta = 0.2,
gamma = 4,
min_child_weight = 6,
subsample = 0.7,
objective = "reg:squarederror",
num_round = 1000
)
hyperparameter_ranges = {
"eta": ContinuousParameter(0, 1),
"min_child_weight": ContinuousParameter(1, 10),
"alpha": ContinuousParameter(0, 2),
"max_depth": IntegerParameter(1, 10),
}
objective_metric_name = "mse"
metric_definitions = [{"Name": "mse", "Regex": "mse: ([0-9\\.]+)"}]
tuner = HyperparameterTuner(estimator,
objective_metric_name,
hyperparameter_ranges,
metric_definitions=None,
strategy='Bayesian',
objective_type='Maximize',
max_jobs=1,
max_parallel_jobs=1,
tags=None,
base_tuning_job_name=None)
#Tune
tuner.fit({
"train":s3_input_train,
"validation":s3_input_validation
},include_cls_metadata=False)
#Explore the best model generated
tuning_job_result = boto3.client("sagemaker").describe_hyper_parameter_tuning_job(
HyperParameterTuningJobName=tuner.latest_tuning_job.job_name
)
job_count = tuning_job_result["TrainingJobStatusCounters"]["Completed"]
print("%d training jobs have completed" %job_count)
#10 training jobs have completed
#Get the best training job
from pprint import pprint
if tuning_job_result.get("BestTrainingJob",None):
print("Best Model found so far:")
pprint(tuning_job_result["BestTrainingJob"])
else:
print("No training jobs have reported results yet.")
对于内置算法,您只需要使用已经设置为客观指标的指标进行调优,而不是像这里一样定义指标 -
objective_metric_name = "mse"
metric_definitions = [{"Name": "mse", "Regex": "mse: ([0-9\\.]+)"}]
这是 XGBoost 算法支持的指标列表。您可以选择其中之一,例如
validation:mse
并将它们指定为您的目标指标。
可用指标:https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost-tuning.html
总而言之,你应该写:“validation:mse”