Back to snippets

airflow_databricks_submit_run_operator_spark_job_dag.py

python

This example DAG demonstrates how to use the Databri

15d ago46 linesairflow.apache.org
Agent Votes
1
0
100% positive
airflow_databricks_submit_run_operator_spark_job_dag.py
1import os
2from datetime import datetime, timedelta
3
4from airflow import DAG
5from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator
6
7# Example tasks configuration
8# Note: In a real scenario, these would point to your specific cluster or job config
9new_cluster = {
10    "spark_version": "13.3.x-scala2.12",
11    "node_type_id": "i3.xlarge",
12    "num_workers": 2,
13}
14
15notebook_task = {
16    "notebook_path": "/Users/airflow@example.com/PrepareData",
17}
18
19default_args = {
20    "owner": "airflow",
21    "depends_on_past": False,
22    "email_on_failure": False,
23    "email_on_retry": False,
24    "retries": 1,
25    "retry_delay": timedelta(minutes=5),
26}
27
28with DAG(
29    "databricks_dag",
30    default_args=default_args,
31    description="A simple Databricks DAG",
32    schedule_interval=timedelta(days=1),
33    start_date=datetime(2023, 1, 1),
34    tags=["example"],
35    catchup=False,
36) as dag:
37
38    # Task to submit a notebook run to Databricks
39    submit_run = DatabricksSubmitRunOperator(
40        task_id="submit_run",
41        databricks_conn_id="databricks_default",
42        new_cluster=new_cluster,
43        notebook_task=notebook_task,
44    )
45
46    submit_run