Back to snippets
airflow_databricks_submit_run_operator_spark_job_dag.py
pythonThis example DAG demonstrates how to use the Databri
Agent Votes
1
0
100% positive
airflow_databricks_submit_run_operator_spark_job_dag.py
1import os
2from datetime import datetime, timedelta
3
4from airflow import DAG
5from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator
6
7# Example tasks configuration
8# Note: In a real scenario, these would point to your specific cluster or job config
9new_cluster = {
10 "spark_version": "13.3.x-scala2.12",
11 "node_type_id": "i3.xlarge",
12 "num_workers": 2,
13}
14
15notebook_task = {
16 "notebook_path": "/Users/airflow@example.com/PrepareData",
17}
18
19default_args = {
20 "owner": "airflow",
21 "depends_on_past": False,
22 "email_on_failure": False,
23 "email_on_retry": False,
24 "retries": 1,
25 "retry_delay": timedelta(minutes=5),
26}
27
28with DAG(
29 "databricks_dag",
30 default_args=default_args,
31 description="A simple Databricks DAG",
32 schedule_interval=timedelta(days=1),
33 start_date=datetime(2023, 1, 1),
34 tags=["example"],
35 catchup=False,
36) as dag:
37
38 # Task to submit a notebook run to Databricks
39 submit_run = DatabricksSubmitRunOperator(
40 task_id="submit_run",
41 databricks_conn_id="databricks_default",
42 new_cluster=new_cluster,
43 notebook_task=notebook_task,
44 )
45
46 submit_run