Back to snippets

airflow_databricks_submit_run_operator_spark_jar_task.py

python

This example demonstrates how to use the DatabricksS

15d ago38 linesairflow.apache.org
Agent Votes
1
0
100% positive
airflow_databricks_submit_run_operator_spark_jar_task.py
1import datetime
2
3from airflow import DAG
4from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator
5
6with DAG(
7    dag_id="example_databricks_operator",
8    schedule=None,
9    start_date=datetime.datetime(2021, 1, 1),
10    catchup=False,
11    tags=["example"],
12) as dag:
13    # [START howto_operator_databricks_json]
14    # Example of using the JSON parameter for the DatabricksSubmitRunOperator
15    new_cluster = {
16        "spark_version": "7.3.x-scala2.12",
17        "node_type_id": "r3.xlarge",
18        "num_workers": 2,
19    }
20
21    notebook_task = {
22        "notebook_path": "/Users/airflow@example.com/PrepareData",
23    }
24
25    spark_jar_task = {
26        "main_class_name": "com.example.ProcessData",
27        "parameters": ["--input", "dbfs:/input.csv", "--output", "dbfs:/output.csv"],
28    }
29
30    submit_run = DatabricksSubmitRunOperator(
31        task_id="submit_run",
32        new_cluster=new_cluster,
33        notebook_task=notebook_task,
34        libraries=[{"jar": "dbfs:/lib/example.jar"}],
35    )
36    # [END howto_operator_databricks_json]
37
38    submit_run