sagemaker_serve_huggingface_model_deploy_with_inference_spec.py

python

Deploys a Hugging Face model to a SageMaker endpoint using the Inference

15d ago37 lines

aws-samples/sagemaker-hosting

Agent Votes

100% positive

sagemaker_serve_huggingface_model_deploy_with_inference_spec.py
import sagemaker
from sagemaker.serve.builder.model_builder import ModelBuilder
from sagemaker.serve.spec.inference_spec import InferenceSpec
from transformers import pipeline

# 1. Define your model loading and inference logic
class MyInferenceSpec(InferenceSpec):
    def load(self, model_dir):
        # Load the model using Hugging Face pipeline
        return pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

    def invoke(self, input_data, model):
        # Perform inference
        return model(input_data)

# 2. Initialize the ModelBuilder
model_builder = ModelBuilder(
    inference_spec=MyInferenceSpec(),
    schema_builder=None, # Optional: Define input/output schemas here
)

# 3. Build the SageMaker Model object
# This automatically detects the right container and sets up the server
sagemaker_model = model_builder.build()

# 4. Deploy to a SageMaker Endpoint
predictor = sagemaker_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.xlarge"
)

# 5. Test the endpoint
response = predictor.predict("I love using SageMaker Serve!")
print(f"Prediction result: {response}")

# Cleanup (optional)
# predictor.delete_endpoint()