|
23 | 23 | import os
|
24 | 24 | import time
|
25 | 25 | from datetime import datetime
|
| 26 | +from pathlib import Path |
26 | 27 | from typing import cast
|
27 | 28 |
|
28 | 29 | from airflow import models
|
| 30 | +from airflow.models.baseoperator import chain |
29 | 31 | from airflow.models.xcom_arg import XComArg
|
| 32 | +from airflow.providers.google.cloud.operators.bigquery import ( |
| 33 | + BigQueryCreateEmptyDatasetOperator, |
| 34 | + BigQueryCreateEmptyTableOperator, |
| 35 | + BigQueryDeleteDatasetOperator, |
| 36 | +) |
30 | 37 | from airflow.providers.google.cloud.operators.bigquery_dts import (
|
31 | 38 | BigQueryCreateDataTransferOperator,
|
32 | 39 | BigQueryDataTransferServiceStartTransferRunsOperator,
|
33 | 40 | BigQueryDeleteDataTransferConfigOperator,
|
34 | 41 | )
|
| 42 | +from airflow.providers.google.cloud.operators.gcs import GCSCreateBucketOperator, GCSDeleteBucketOperator |
35 | 43 | from airflow.providers.google.cloud.sensors.bigquery_dts import BigQueryDataTransferServiceTransferRunSensor
|
| 44 | +from airflow.providers.google.cloud.transfers.local_to_gcs import LocalFilesystemToGCSOperator |
| 45 | +from airflow.utils.trigger_rule import TriggerRule |
| 46 | + |
| 47 | +ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID") |
| 48 | +PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT") |
| 49 | + |
| 50 | +DAG_ID = "example_gcp_bigquery_dts" |
36 | 51 |
|
37 |
| -GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project") |
38 |
| -BUCKET_URI = os.environ.get("GCP_DTS_BUCKET_URI", "gs://INVALID BUCKET NAME/bank-marketing.csv") |
39 |
| -GCP_DTS_BQ_DATASET = os.environ.get("GCP_DTS_BQ_DATASET", "test_dts") |
40 |
| -GCP_DTS_BQ_TABLE = os.environ.get("GCP_DTS_BQ_TABLE", "GCS_Test") |
| 52 | +BUCKET_NAME = f"bucket-{DAG_ID}-{ENV_ID}" |
| 53 | + |
| 54 | +FILE_NAME = "us-states.csv" |
| 55 | +CURRENT_FOLDER = Path(__file__).parent |
| 56 | +FILE_LOCAL_PATH = str(Path(CURRENT_FOLDER) / "resources" / FILE_NAME) |
| 57 | +BUCKET_URI = f"gs://{BUCKET_NAME}/{FILE_NAME}" |
| 58 | + |
| 59 | +DATASET_NAME = f"dataset_{DAG_ID}_{ENV_ID}" |
| 60 | +DTS_BQ_TABLE = "DTS_BQ_TABLE" |
41 | 61 |
|
42 | 62 | # [START howto_bigquery_dts_create_args]
|
43 | 63 |
|
44 | 64 | # In the case of Airflow, the customer needs to create a transfer
|
45 | 65 | # config with the automatic scheduling disabled and then trigger
|
46 | 66 | # a transfer run using a specialized Airflow operator
|
47 |
| -schedule_options = {"disable_auto_scheduling": True} |
48 |
| - |
49 |
| -PARAMS = { |
50 |
| - "field_delimiter": ",", |
51 |
| - "max_bad_records": "0", |
52 |
| - "skip_leading_rows": "1", |
53 |
| - "data_path_template": BUCKET_URI, |
54 |
| - "destination_table_name_template": GCP_DTS_BQ_TABLE, |
55 |
| - "file_format": "CSV", |
56 |
| -} |
57 |
| - |
58 | 67 | TRANSFER_CONFIG = {
|
59 |
| - "destination_dataset_id": GCP_DTS_BQ_DATASET, |
60 |
| - "display_name": "GCS Test Config", |
| 68 | + "destination_dataset_id": DATASET_NAME, |
| 69 | + "display_name": "test data transfer", |
61 | 70 | "data_source_id": "google_cloud_storage",
|
62 |
| - "schedule_options": schedule_options, |
63 |
| - "params": PARAMS, |
| 71 | + "schedule_options": {"disable_auto_scheduling": True}, |
| 72 | + "params": { |
| 73 | + "field_delimiter": ",", |
| 74 | + "max_bad_records": "0", |
| 75 | + "skip_leading_rows": "1", |
| 76 | + "data_path_template": BUCKET_URI, |
| 77 | + "destination_table_name_template": DTS_BQ_TABLE, |
| 78 | + "file_format": "CSV", |
| 79 | + }, |
64 | 80 | }
|
65 | 81 |
|
66 | 82 | # [END howto_bigquery_dts_create_args]
|
67 | 83 |
|
68 | 84 | with models.DAG(
|
69 |
| - "example_gcp_bigquery_dts", |
| 85 | + DAG_ID, |
| 86 | + schedule="@once", |
70 | 87 | start_date=datetime(2021, 1, 1),
|
71 | 88 | catchup=False,
|
72 |
| - tags=["example"], |
| 89 | + tags=["example", "bigquery"], |
73 | 90 | ) as dag:
|
| 91 | + |
| 92 | + create_bucket = GCSCreateBucketOperator( |
| 93 | + task_id="create_bucket", bucket_name=BUCKET_NAME, project_id=PROJECT_ID |
| 94 | + ) |
| 95 | + upload_file = LocalFilesystemToGCSOperator( |
| 96 | + task_id="upload_file", |
| 97 | + src=FILE_LOCAL_PATH, |
| 98 | + dst=FILE_NAME, |
| 99 | + bucket=BUCKET_NAME, |
| 100 | + ) |
| 101 | + create_dataset = BigQueryCreateEmptyDatasetOperator(task_id="create_dataset", dataset_id=DATASET_NAME) |
| 102 | + |
| 103 | + create_table = BigQueryCreateEmptyTableOperator( |
| 104 | + task_id="create_table", |
| 105 | + dataset_id=DATASET_NAME, |
| 106 | + table_id=DTS_BQ_TABLE, |
| 107 | + schema_fields=[ |
| 108 | + {"name": "name", "type": "STRING", "mode": "REQUIRED"}, |
| 109 | + {"name": "post_abbr", "type": "STRING", "mode": "NULLABLE"}, |
| 110 | + ], |
| 111 | + ) |
| 112 | + |
74 | 113 | # [START howto_bigquery_create_data_transfer]
|
75 | 114 | gcp_bigquery_create_transfer = BigQueryCreateDataTransferOperator(
|
76 | 115 | transfer_config=TRANSFER_CONFIG,
|
77 |
| - project_id=GCP_PROJECT_ID, |
| 116 | + project_id=PROJECT_ID, |
78 | 117 | task_id="gcp_bigquery_create_transfer",
|
79 | 118 | )
|
80 | 119 |
|
|
103 | 142 | transfer_config_id=transfer_config_id, task_id="gcp_bigquery_delete_transfer"
|
104 | 143 | )
|
105 | 144 | # [END howto_bigquery_delete_data_transfer]
|
| 145 | + gcp_bigquery_delete_transfer.trigger_rule = TriggerRule.ALL_DONE |
| 146 | + |
| 147 | + delete_dataset = BigQueryDeleteDatasetOperator( |
| 148 | + task_id="delete_dataset", |
| 149 | + dataset_id=DATASET_NAME, |
| 150 | + delete_contents=True, |
| 151 | + trigger_rule=TriggerRule.ALL_DONE, |
| 152 | + ) |
106 | 153 |
|
107 |
| - gcp_run_sensor >> gcp_bigquery_delete_transfer |
| 154 | + delete_bucket = GCSDeleteBucketOperator( |
| 155 | + task_id="delete_bucket", bucket_name=BUCKET_NAME, trigger_rule=TriggerRule.ALL_DONE |
| 156 | + ) |
108 | 157 |
|
109 | 158 | # Task dependencies created via `XComArgs`:
|
110 | 159 | # gcp_bigquery_create_transfer >> gcp_bigquery_start_transfer
|
111 | 160 | # gcp_bigquery_create_transfer >> gcp_run_sensor
|
112 | 161 | # gcp_bigquery_start_transfer >> gcp_run_sensor
|
113 | 162 | # gcp_bigquery_create_transfer >> gcp_bigquery_delete_transfer
|
| 163 | + |
| 164 | + chain( |
| 165 | + # TEST SETUP |
| 166 | + create_bucket, |
| 167 | + upload_file, |
| 168 | + create_dataset, |
| 169 | + create_table, |
| 170 | + # TEST BODY |
| 171 | + gcp_bigquery_create_transfer, |
| 172 | + gcp_bigquery_start_transfer, |
| 173 | + gcp_run_sensor, |
| 174 | + gcp_bigquery_delete_transfer, |
| 175 | + # TEST TEARDOWN |
| 176 | + delete_dataset, |
| 177 | + delete_bucket, |
| 178 | + ) |
| 179 | + |
| 180 | + from tests.system.utils.watcher import watcher |
| 181 | + |
| 182 | + # This test needs watcher in order to properly mark success/failure |
| 183 | + # when "tearDown" task with trigger rule is part of the DAG |
| 184 | + list(dag.tasks) >> watcher() |
| 185 | + |
| 186 | + |
| 187 | +from tests.system.utils import get_test_run # noqa: E402 |
| 188 | + |
| 189 | +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) |
| 190 | +test_run = get_test_run(dag) |
0 commit comments