|
28 | 28 | .. warning::
|
29 | 29 | You need to provide a large enough set of data so that operations do not execute too quickly.
|
30 | 30 | Otherwise, DAG will fail.
|
31 |
| -* GCP_TRANSFER_FIRST_TARGET_BUCKET - Google Cloud Storage bucket to which files are copied from AWS. |
32 |
| - It is also a source bucket in next step |
33 | 31 | * GCP_TRANSFER_SECOND_TARGET_BUCKET - Google Cloud Storage bucket bucket to which files are copied
|
34 | 32 | * WAIT_FOR_OPERATION_POKE_INTERVAL - interval of what to check the status of the operation
|
35 | 33 | A smaller value than the default value accelerates the system test and ensures its correct execution with
|
|
40 | 38 |
|
41 | 39 | import os
|
42 | 40 | from datetime import datetime, timedelta
|
43 |
| -from typing import Any, Dict |
44 | 41 |
|
45 | 42 | from airflow import models
|
46 | 43 | from airflow.providers.google.cloud.hooks.cloud_storage_transfer_service import (
|
47 | 44 | ALREADY_EXISTING_IN_SINK, AWS_S3_DATA_SOURCE, BUCKET_NAME, DESCRIPTION, FILTER_JOB_NAMES,
|
48 |
| - FILTER_PROJECT_ID, GCS_DATA_SINK, GCS_DATA_SOURCE, PROJECT_ID, SCHEDULE, SCHEDULE_END_DATE, |
49 |
| - SCHEDULE_START_DATE, START_TIME_OF_DAY, STATUS, TRANSFER_JOB, TRANSFER_JOB_FIELD_MASK, TRANSFER_OPTIONS, |
50 |
| - TRANSFER_SPEC, GcpTransferJobsStatus, GcpTransferOperationStatus, |
| 45 | + FILTER_PROJECT_ID, GCS_DATA_SINK, PROJECT_ID, SCHEDULE, SCHEDULE_END_DATE, SCHEDULE_START_DATE, |
| 46 | + START_TIME_OF_DAY, STATUS, TRANSFER_OPTIONS, TRANSFER_SPEC, GcpTransferJobsStatus, |
| 47 | + GcpTransferOperationStatus, |
51 | 48 | )
|
52 | 49 | from airflow.providers.google.cloud.operators.cloud_storage_transfer_service import (
|
53 | 50 | CloudDataTransferServiceCancelOperationOperator, CloudDataTransferServiceCreateJobOperator,
|
54 | 51 | CloudDataTransferServiceDeleteJobOperator, CloudDataTransferServiceGetOperationOperator,
|
55 | 52 | CloudDataTransferServiceListOperationsOperator, CloudDataTransferServicePauseOperationOperator,
|
56 |
| - CloudDataTransferServiceResumeOperationOperator, CloudDataTransferServiceUpdateJobOperator, |
| 53 | + CloudDataTransferServiceResumeOperationOperator, |
57 | 54 | )
|
58 | 55 | from airflow.providers.google.cloud.sensors.cloud_storage_transfer_service import (
|
59 | 56 | CloudDataTransferServiceJobStatusSensor,
|
|
69 | 66 | GCP_TRANSFER_FIRST_TARGET_BUCKET = os.environ.get(
|
70 | 67 | 'GCP_TRANSFER_FIRST_TARGET_BUCKET', 'gcp-transfer-first-target'
|
71 | 68 | )
|
72 |
| -GCP_TRANSFER_SECOND_TARGET_BUCKET = os.environ.get( |
73 |
| - 'GCP_TRANSFER_SECOND_TARGET_BUCKET', 'gcp-transfer-second-target' |
74 |
| -) |
75 | 69 |
|
76 | 70 | # [START howto_operator_gcp_transfer_create_job_body_aws]
|
77 | 71 | aws_to_gcs_transfer_body = {
|
|
91 | 85 | }
|
92 | 86 | # [END howto_operator_gcp_transfer_create_job_body_aws]
|
93 | 87 |
|
94 |
| -# [START howto_operator_gcp_transfer_create_job_body_gcp] |
95 |
| -gcs_to_gcs_transfer_body = { |
96 |
| - DESCRIPTION: GCP_DESCRIPTION, |
97 |
| - STATUS: GcpTransferJobsStatus.ENABLED, |
98 |
| - PROJECT_ID: GCP_PROJECT_ID, |
99 |
| - SCHEDULE: { |
100 |
| - SCHEDULE_START_DATE: datetime(2015, 1, 1).date(), |
101 |
| - SCHEDULE_END_DATE: datetime(2030, 1, 1).date(), |
102 |
| - START_TIME_OF_DAY: (datetime.utcnow() + timedelta(minutes=2)).time(), |
103 |
| - }, |
104 |
| - TRANSFER_SPEC: { |
105 |
| - GCS_DATA_SOURCE: {BUCKET_NAME: GCP_TRANSFER_FIRST_TARGET_BUCKET}, |
106 |
| - GCS_DATA_SINK: {BUCKET_NAME: GCP_TRANSFER_SECOND_TARGET_BUCKET}, |
107 |
| - TRANSFER_OPTIONS: {ALREADY_EXISTING_IN_SINK: True}, |
108 |
| - }, |
109 |
| -} # type: Dict[str, Any] |
110 |
| -# [END howto_operator_gcp_transfer_create_job_body_gcp] |
111 |
| - |
112 |
| -# [START howto_operator_gcp_transfer_update_job_body] |
113 |
| -update_body = { |
114 |
| - PROJECT_ID: GCP_PROJECT_ID, |
115 |
| - TRANSFER_JOB: {DESCRIPTION: "{}_updated".format(GCP_DESCRIPTION)}, |
116 |
| - TRANSFER_JOB_FIELD_MASK: "description", |
117 |
| -} |
118 |
| -# [END howto_operator_gcp_transfer_update_job_body] |
119 |
| - |
120 |
| -list_filter_dict = {FILTER_PROJECT_ID: GCP_PROJECT_ID, FILTER_JOB_NAMES: []} |
121 | 88 |
|
122 | 89 | # [START howto_operator_gcp_transfer_default_args]
|
123 | 90 | default_args = {'start_date': days_ago(1)}
|
124 | 91 | # [END howto_operator_gcp_transfer_default_args]
|
125 | 92 |
|
126 | 93 | with models.DAG(
|
127 |
| - 'example_gcp_transfer', |
| 94 | + 'example_gcp_transfer_aws', |
128 | 95 | default_args=default_args,
|
129 | 96 | schedule_interval=None, # Override to match your needs
|
130 | 97 | tags=['example'],
|
|
152 | 119 | )
|
153 | 120 | # [END howto_operator_gcp_transfer_pause_operation]
|
154 | 121 |
|
155 |
| - # [START howto_operator_gcp_transfer_update_job] |
156 |
| - update_job = CloudDataTransferServiceUpdateJobOperator( |
157 |
| - task_id="update_job", |
158 |
| - job_name="{{task_instance.xcom_pull('create_transfer_job_from_aws')['name']}}", |
159 |
| - body=update_body, |
160 |
| - ) |
161 |
| - # [END howto_operator_gcp_transfer_update_job] |
162 |
| - |
163 | 122 | # [START howto_operator_gcp_transfer_list_operations]
|
164 | 123 | list_operations = CloudDataTransferServiceListOperationsOperator(
|
165 | 124 | task_id="list_operations",
|
|
192 | 151 | )
|
193 | 152 | # [END howto_operator_gcp_transfer_wait_operation]
|
194 | 153 |
|
195 |
| - job_time = datetime.utcnow() + timedelta(minutes=2) |
196 |
| - |
197 |
| - gcs_to_gcs_transfer_body['schedule']['startTimeOfDay'] = (datetime.utcnow() + timedelta(minutes=2)).time() |
198 |
| - |
199 |
| - create_transfer_job_from_gcp = CloudDataTransferServiceCreateJobOperator( |
200 |
| - task_id="create_transfer_job_from_gcp", body=gcs_to_gcs_transfer_body |
201 |
| - ) |
202 |
| - |
203 |
| - wait_for_second_operation_to_start = CloudDataTransferServiceJobStatusSensor( |
204 |
| - task_id="wait_for_second_operation_to_start", |
205 |
| - job_name="{{ task_instance.xcom_pull('create_transfer_job_from_gcp')['name'] }}", |
206 |
| - project_id=GCP_PROJECT_ID, |
207 |
| - expected_statuses={GcpTransferOperationStatus.IN_PROGRESS}, |
208 |
| - poke_interval=WAIT_FOR_OPERATION_POKE_INTERVAL, |
209 |
| - ) |
210 |
| - |
211 | 154 | # [START howto_operator_gcp_transfer_cancel_operation]
|
212 | 155 | cancel_operation = CloudDataTransferServiceCancelOperationOperator(
|
213 | 156 | task_id="cancel_operation",
|
|
224 | 167 | )
|
225 | 168 | # [END howto_operator_gcp_transfer_delete_job]
|
226 | 169 |
|
227 |
| - delete_transfer_from_gcp_job = CloudDataTransferServiceDeleteJobOperator( |
228 |
| - task_id="delete_transfer_from_gcp_job", |
229 |
| - job_name="{{task_instance.xcom_pull('create_transfer_job_from_gcp')['name']}}", |
230 |
| - project_id=GCP_PROJECT_ID, |
231 |
| - ) |
232 |
| - |
233 | 170 | create_transfer_job_from_aws >> wait_for_operation_to_start >> pause_operation >> \
|
234 | 171 | list_operations >> get_operation >> resume_operation >> wait_for_operation_to_end >> \
|
235 |
| - create_transfer_job_from_gcp >> wait_for_second_operation_to_start >> cancel_operation >> \ |
236 |
| - delete_transfer_from_aws_job >> delete_transfer_from_gcp_job |
| 172 | + cancel_operation >> delete_transfer_from_aws_job |
0 commit comments