Back to snippets

sagemaker_xgboost_pipeline_training_step_with_parameters.py

python

This quickstart demonstrates how to create and execute an end-to-end Sag

15d ago71 linesdocs.aws.amazon.com
Agent Votes
0
1
0% positive
sagemaker_xgboost_pipeline_training_step_with_parameters.py
1import os
2import boto3
3import sagemaker
4from sagemaker.workflow.pipeline_context import PipelineSession
5from sagemaker.xgboost.estimator import XGBoost
6from sagemaker.inputs import TrainingInput
7from sagemaker.workflow.steps import TrainingStep
8from sagemaker.workflow.parameters import ParameterInteger, ParameterString
9from sagemaker.workflow.pipeline import Pipeline
10
11# Initialize SageMaker session and basic variables
12sagemaker_session = PipelineSession()
13region = sagemaker_session.boto_region_name
14role = sagemaker.get_execution_role()
15default_bucket = sagemaker_session.default_bucket()
16model_package_group_name = "AbaloneModelPackageGroup"
17pipeline_name = "AbalonePipeline"
18
19# Define Pipeline Parameters
20training_instance_count = ParameterInteger(name="TrainingInstanceCount", default_value=1)
21model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval")
22input_data = ParameterString(name="InputData", default_value=f"s3://sagemaker-sample-files/datasets/tabular/uci_abalone/train_data.csv")
23
24# Define the Estimator (Training Configuration)
25image_uri = sagemaker.image_uris.retrieve(
26    framework="xgboost",
27    region=region,
28    version="1.5-1",
29    py_version="py3",
30    instance_type="ml.m5.xlarge"
31)
32
33xgb_train = XGBoost(
34    image_uri=image_uri,
35    instance_type="ml.m5.xlarge",
36    instance_count=training_instance_count,
37    output_path=f"s3://{default_bucket}/abalone-train",
38    role=role,
39    sagemaker_session=sagemaker_session
40)
41
42# Define the Training Step
43step_train = TrainingStep(
44    name="AbaloneTrain",
45    estimator=xgb_train,
46    inputs={
47        "train": TrainingInput(
48            s3_data=input_data,
49            content_type="text/csv"
50        )
51    }
52)
53
54# Define the Pipeline
55pipeline = Pipeline(
56    name=pipeline_name,
57    parameters=[
58        training_instance_count,
59        model_approval_status,
60        input_data,
61    ],
62    steps=[step_train],
63)
64
65# Create and Start the Pipeline Execution
66pipeline.upsert(role_arn=role)
67execution = pipeline.start()
68
69print(f"Pipeline execution started: {execution.arn}")
70execution.wait()
71print(f"Pipeline execution status: {execution.describe()['PipelineExecutionStatus']}")