Skip to content

Commit c8cacad

Browse files
authored
Migrate Google example trino_to_gcs to new design AIP-47 (#25420)
related: #22447, #22430
1 parent 4eb0a41 commit c8cacad

File tree

3 files changed

+41
-179
lines changed

3 files changed

+41
-179
lines changed

docs/apache-airflow-providers-google/operators/transfer/trino_to_gcs.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ All parameters are described in the reference documentation - :class:`~airflow.p
4949

5050
An example operator call might look like this:
5151

52-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py
52+
.. exampleinclude:: /../../tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py
5353
:language: python
5454
:dedent: 4
5555
:start-after: [START howto_operator_trino_to_gcs_basic]
@@ -67,7 +67,7 @@ You can specify these options by the ``export_format`` parameter.
6767

6868
If you want a CSV file to be created, your operator call might look like this:
6969

70-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py
70+
.. exampleinclude:: /../../tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py
7171
:language: python
7272
:dedent: 4
7373
:start-after: [START howto_operator_trino_to_gcs_csv]
@@ -81,7 +81,7 @@ will be dumped from the database and upload to the bucket.
8181

8282
If you want to create a schema file, then an example operator call might look like this:
8383

84-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py
84+
.. exampleinclude:: /../../tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py
8585
:language: python
8686
:dedent: 4
8787
:start-after: [START howto_operator_trino_to_gcs_multiple_types]
@@ -102,7 +102,7 @@ maximum allowed file size for a single object.
102102

103103
If you want to create 10 MB files, your code might look like this:
104104

105-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py
105+
.. exampleinclude:: /../../tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py
106106
:language: python
107107
:dedent: 4
108108
:start-after: [START howto_operator_read_data_from_gcs_many_chunks]
@@ -123,7 +123,7 @@ For example, if you want to create an external table that allows you to create q
123123
read data directly from GCS, then you can use :class:`~airflow.providers.google.cloud.operators.bigquery.BigQueryCreateExternalTableOperator`.
124124
Using this operator looks like this:
125125

126-
.. exampleinclude:: /../../airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py
126+
.. exampleinclude:: /../../tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py
127127
:language: python
128128
:dedent: 4
129129
:start-after: [START howto_operator_create_external_table_multiple_types]

tests/providers/google/cloud/transfers/test_trino_to_gcs_system.py

Lines changed: 0 additions & 158 deletions
This file was deleted.

airflow/providers/google/cloud/example_dags/example_trino_to_gcs.py renamed to tests/system/providers/google/cloud/gcs/example_trino_to_gcs.py

Lines changed: 36 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,14 @@
3030
BigQueryInsertJobOperator,
3131
)
3232
from airflow.providers.google.cloud.transfers.trino_to_gcs import TrinoToGCSOperator
33+
from airflow.utils.trigger_rule import TriggerRule
34+
35+
ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
36+
DAG_ID = "example_trino_to_gcs"
3337

3438
GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", 'example-project')
35-
GCS_BUCKET = os.environ.get("GCP_TRINO_TO_GCS_BUCKET_NAME", "INVALID BUCKET NAME")
36-
DATASET_NAME = os.environ.get("GCP_TRINO_TO_GCS_DATASET_NAME", "test_trino_to_gcs_dataset")
39+
GCS_BUCKET = f"bucket_{DAG_ID}_{ENV_ID}"
40+
DATASET_NAME = f"dataset_{DAG_ID}_{ENV_ID}"
3741

3842
SOURCE_MULTIPLE_TYPES = "memory.default.test_multiple_types"
3943
SOURCE_CUSTOMER_TABLE = "tpch.sf1.customer"
@@ -47,17 +51,19 @@ def safe_name(s: str) -> str:
4751

4852

4953
with models.DAG(
50-
dag_id="example_trino_to_gcs",
54+
dag_id=DAG_ID,
5155
schedule_interval='@once', # Override to match your needs
5256
start_date=datetime(2021, 1, 1),
5357
catchup=False,
54-
tags=["example"],
58+
tags=["example", "gcs"],
5559
) as dag:
56-
5760
create_dataset = BigQueryCreateEmptyDatasetOperator(task_id="create-dataset", dataset_id=DATASET_NAME)
5861

5962
delete_dataset = BigQueryDeleteDatasetOperator(
60-
task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
63+
task_id="delete_dataset",
64+
dataset_id=DATASET_NAME,
65+
delete_contents=True,
66+
trigger_rule=TriggerRule.ALL_DONE,
6167
)
6268

6369
# [START howto_operator_trino_to_gcs_basic]
@@ -179,15 +185,29 @@ def safe_name(s: str) -> str:
179185
)
180186
# [END howto_operator_trino_to_gcs_csv]
181187

182-
create_dataset >> trino_to_gcs_basic
183-
create_dataset >> trino_to_gcs_multiple_types
184-
create_dataset >> trino_to_gcs_many_chunks
185-
create_dataset >> trino_to_gcs_csv
188+
(
189+
# TEST SETUP
190+
create_dataset
191+
# TEST BODY
192+
>> trino_to_gcs_basic
193+
>> trino_to_gcs_multiple_types
194+
>> trino_to_gcs_many_chunks
195+
>> trino_to_gcs_csv
196+
>> create_external_table_multiple_types
197+
>> create_external_table_many_chunks
198+
>> read_data_from_gcs_multiple_types
199+
>> read_data_from_gcs_many_chunks
200+
# TEST TEARDOWN
201+
>> delete_dataset
202+
)
203+
204+
from tests.system.utils.watcher import watcher
205+
206+
# This test needs watcher in order to properly mark success/failure
207+
# when "tearDown" task with trigger rule is part of the DAG
208+
list(dag.tasks) >> watcher()
186209

187-
trino_to_gcs_multiple_types >> create_external_table_multiple_types >> read_data_from_gcs_multiple_types
188-
trino_to_gcs_many_chunks >> create_external_table_many_chunks >> read_data_from_gcs_many_chunks
210+
from tests.system.utils import get_test_run # noqa: E402
189211

190-
trino_to_gcs_basic >> delete_dataset
191-
trino_to_gcs_csv >> delete_dataset
192-
read_data_from_gcs_multiple_types >> delete_dataset
193-
read_data_from_gcs_many_chunks >> delete_dataset
212+
# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
213+
test_run = get_test_run(dag)

0 commit comments

Comments
 (0)