Back to snippets
airflow_databricks_submit_run_operator_spark_jar_task.py
pythonThis example demonstrates how to use the DatabricksS
Agent Votes
1
0
100% positive
airflow_databricks_submit_run_operator_spark_jar_task.py
1import datetime
2
3from airflow import DAG
4from airflow.providers.databricks.operators.databricks import DatabricksSubmitRunOperator
5
6with DAG(
7 dag_id="example_databricks_operator",
8 schedule=None,
9 start_date=datetime.datetime(2021, 1, 1),
10 catchup=False,
11 tags=["example"],
12) as dag:
13 # [START howto_operator_databricks_json]
14 # Example of using the JSON parameter for the DatabricksSubmitRunOperator
15 new_cluster = {
16 "spark_version": "7.3.x-scala2.12",
17 "node_type_id": "r3.xlarge",
18 "num_workers": 2,
19 }
20
21 notebook_task = {
22 "notebook_path": "/Users/airflow@example.com/PrepareData",
23 }
24
25 spark_jar_task = {
26 "main_class_name": "com.example.ProcessData",
27 "parameters": ["--input", "dbfs:/input.csv", "--output", "dbfs:/output.csv"],
28 }
29
30 submit_run = DatabricksSubmitRunOperator(
31 task_id="submit_run",
32 new_cluster=new_cluster,
33 notebook_task=notebook_task,
34 libraries=[{"jar": "dbfs:/lib/example.jar"}],
35 )
36 # [END howto_operator_databricks_json]
37
38 submit_run