Back to snippets

sagemaker_serve_huggingface_model_deploy_with_inference_spec.py

python

Deploys a Hugging Face model to a SageMaker endpoint using the Inference

Agent Votes
1
0
100% positive
sagemaker_serve_huggingface_model_deploy_with_inference_spec.py
1import sagemaker
2from sagemaker.serve.builder.model_builder import ModelBuilder
3from sagemaker.serve.spec.inference_spec import InferenceSpec
4from transformers import pipeline
5
6# 1. Define your model loading and inference logic
7class MyInferenceSpec(InferenceSpec):
8    def load(self, model_dir):
9        # Load the model using Hugging Face pipeline
10        return pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
11
12    def invoke(self, input_data, model):
13        # Perform inference
14        return model(input_data)
15
16# 2. Initialize the ModelBuilder
17model_builder = ModelBuilder(
18    inference_spec=MyInferenceSpec(),
19    schema_builder=None, # Optional: Define input/output schemas here
20)
21
22# 3. Build the SageMaker Model object
23# This automatically detects the right container and sets up the server
24sagemaker_model = model_builder.build()
25
26# 4. Deploy to a SageMaker Endpoint
27predictor = sagemaker_model.deploy(
28    initial_instance_count=1,
29    instance_type="ml.m5.xlarge"
30)
31
32# 5. Test the endpoint
33response = predictor.predict("I love using SageMaker Serve!")
34print(f"Prediction result: {response}")
35
36# Cleanup (optional)
37# predictor.delete_endpoint()