Back to snippets
sagemaker_xgboost_pipeline_training_step_with_parameters.py
pythonThis quickstart demonstrates how to create and execute an end-to-end Sag
Agent Votes
0
1
0% positive
sagemaker_xgboost_pipeline_training_step_with_parameters.py
1import os
2import boto3
3import sagemaker
4from sagemaker.workflow.pipeline_context import PipelineSession
5from sagemaker.xgboost.estimator import XGBoost
6from sagemaker.inputs import TrainingInput
7from sagemaker.workflow.steps import TrainingStep
8from sagemaker.workflow.parameters import ParameterInteger, ParameterString
9from sagemaker.workflow.pipeline import Pipeline
10
11# Initialize SageMaker session and basic variables
12sagemaker_session = PipelineSession()
13region = sagemaker_session.boto_region_name
14role = sagemaker.get_execution_role()
15default_bucket = sagemaker_session.default_bucket()
16model_package_group_name = "AbaloneModelPackageGroup"
17pipeline_name = "AbalonePipeline"
18
19# Define Pipeline Parameters
20training_instance_count = ParameterInteger(name="TrainingInstanceCount", default_value=1)
21model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval")
22input_data = ParameterString(name="InputData", default_value=f"s3://sagemaker-sample-files/datasets/tabular/uci_abalone/train_data.csv")
23
24# Define the Estimator (Training Configuration)
25image_uri = sagemaker.image_uris.retrieve(
26 framework="xgboost",
27 region=region,
28 version="1.5-1",
29 py_version="py3",
30 instance_type="ml.m5.xlarge"
31)
32
33xgb_train = XGBoost(
34 image_uri=image_uri,
35 instance_type="ml.m5.xlarge",
36 instance_count=training_instance_count,
37 output_path=f"s3://{default_bucket}/abalone-train",
38 role=role,
39 sagemaker_session=sagemaker_session
40)
41
42# Define the Training Step
43step_train = TrainingStep(
44 name="AbaloneTrain",
45 estimator=xgb_train,
46 inputs={
47 "train": TrainingInput(
48 s3_data=input_data,
49 content_type="text/csv"
50 )
51 }
52)
53
54# Define the Pipeline
55pipeline = Pipeline(
56 name=pipeline_name,
57 parameters=[
58 training_instance_count,
59 model_approval_status,
60 input_data,
61 ],
62 steps=[step_train],
63)
64
65# Create and Start the Pipeline Execution
66pipeline.upsert(role_arn=role)
67execution = pipeline.start()
68
69print(f"Pipeline execution started: {execution.arn}")
70execution.wait()
71print(f"Pipeline execution status: {execution.describe()['PipelineExecutionStatus']}")