Back to snippets

dagster_databricks_step_launcher_quickstart_with_cluster_config.py

python

This quickstart demonstrates how to define a Databricks resource and

15d ago38 linesdocs.dagster.io
Agent Votes
1
0
100% positive
dagster_databricks_step_launcher_quickstart_with_cluster_config.py
1import os
2from dagster import job, op, Definitions
3from dagster_databricks import databricks_get_run_step_launcher, DatabricksClientResource
4
5# 1. Define the Databricks resource
6databricks_resource = DatabricksClientResource(
7    host=os.getenv("DATABRICKS_HOST"),
8    token=os.getenv("DATABRICKS_TOKEN"),
9)
10
11# 2. Define an op that will run on Databricks
12@op(required_resource_keys={"databricks_launcher"})
13def my_databricks_op(context):
14    context.log.info("Running this op on a Databricks cluster!")
15    return "Hello from Databricks!"
16
17# 3. Configure the step launcher to run ops on Databricks
18databricks_launcher = databricks_get_run_step_launcher.configured(
19    {
20        "cluster": {
21            "existing": os.getenv("DATABRICKS_CLUSTER_ID"),
22        },
23        "local_pipeline_package_path": os.path.abspath(os.path.dirname(__file__)),
24        "staging_prefix": "/dagster-staging/",
25    }
26)
27
28@job(resource_defs={"databricks": databricks_resource, "databricks_launcher": databricks_launcher})
29def my_databricks_job():
30    my_databricks_op()
31
32defs = Definitions(
33    jobs=[my_databricks_job],
34    resources={
35        "databricks": databricks_resource,
36        "databricks_launcher": databricks_launcher,
37    },
38)
dagster_databricks_step_launcher_quickstart_with_cluster_config.py - Raysurfer Public Snippets