|
52 | 52 | )
|
53 | 53 | INSPECT_CONFIG = InspectConfig(info_types=[{"name": "PHONE_NUMBER"}, {"name": "US_TOLLFREE_PHONE_NUMBER"}])
|
54 | 54 | INSPECT_TEMPLATE = InspectTemplate(inspect_config=INSPECT_CONFIG)
|
| 55 | +OUTPUT_BUCKET = os.environ.get("DLP_OUTPUT_BUCKET", "gs://test-dlp-airflow") |
| 56 | +OUTPUT_FILENAME = "test.txt" |
55 | 57 |
|
| 58 | +OBJECT_GCS_URI = os.path.join(OUTPUT_BUCKET, "tmp") |
| 59 | +OBJECT_GCS_OUTPUT_URI = os.path.join(OUTPUT_BUCKET, "tmp", OUTPUT_FILENAME) |
56 | 60 |
|
57 | 61 | with models.DAG(
|
58 | 62 | "example_gcp_dlp",
|
59 | 63 | schedule_interval=None, # Override to match your needs
|
60 | 64 | start_date=days_ago(1),
|
61 | 65 | tags=['example'],
|
62 |
| -) as dag: |
| 66 | +) as dag1: |
63 | 67 | # [START howto_operator_dlp_create_inspect_template]
|
64 | 68 | create_template = CloudDLPCreateInspectTemplateOperator(
|
65 | 69 | project_id=GCP_PROJECT,
|
66 | 70 | inspect_template=INSPECT_TEMPLATE,
|
67 | 71 | template_id=TEMPLATE_ID,
|
68 | 72 | task_id="create_template",
|
69 | 73 | do_xcom_push=True,
|
70 |
| - dag=dag, |
71 | 74 | )
|
72 | 75 | # [END howto_operator_dlp_create_inspect_template]
|
73 | 76 |
|
|
77 | 80 | project_id=GCP_PROJECT,
|
78 | 81 | item=ITEM,
|
79 | 82 | inspect_template_name="{{ task_instance.xcom_pull('create_template', key='return_value')['name'] }}",
|
80 |
| - dag=dag, |
81 | 83 | )
|
82 | 84 | # [END howto_operator_dlp_use_inspect_template]
|
83 | 85 |
|
84 | 86 | # [START howto_operator_dlp_delete_inspect_template]
|
85 | 87 | delete_template = CloudDLPDeleteInspectTemplateOperator(
|
86 |
| - task_id="delete_template", template_id=TEMPLATE_ID, project_id=GCP_PROJECT, dag=dag, |
| 88 | + task_id="delete_template", template_id=TEMPLATE_ID, project_id=GCP_PROJECT, |
87 | 89 | )
|
88 | 90 | # [END howto_operator_dlp_delete_inspect_template]
|
89 | 91 |
|
90 | 92 | create_template >> inspect_content >> delete_template
|
91 | 93 |
|
92 |
| - |
93 |
| -CUSTOM_INFO_TYPES = [{"info_type": {"name": "C_MRN"}, "regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},}] |
94 | 94 | CUSTOM_INFO_TYPE_ID = "custom_info_type"
|
95 |
| -UPDATE_CUSTOM_INFO_TYPE = [ |
96 |
| - {"info_type": {"name": "C_MRN"}, "regex": {"pattern": "[a-z]{3}-[a-z]{1}-[a-z]{5}"},} |
97 |
| -] |
| 95 | +CUSTOM_INFO_TYPES = { |
| 96 | + "large_custom_dictionary": { |
| 97 | + "output_path": {"path": OBJECT_GCS_OUTPUT_URI}, |
| 98 | + "cloud_storage_file_set": {"url": OBJECT_GCS_URI + "/"}, |
| 99 | + } |
| 100 | +} |
| 101 | +UPDATE_CUSTOM_INFO_TYPE = { |
| 102 | + "large_custom_dictionary": { |
| 103 | + "output_path": {"path": OBJECT_GCS_OUTPUT_URI}, |
| 104 | + "cloud_storage_file_set": {"url": OBJECT_GCS_URI + "/"}, |
| 105 | + } |
| 106 | +} |
98 | 107 |
|
99 | 108 | with models.DAG(
|
100 | 109 | "example_gcp_dlp_info_types",
|
101 | 110 | schedule_interval=None,
|
102 | 111 | start_date=days_ago(1),
|
103 | 112 | tags=["example", "dlp", "info-types"],
|
104 |
| -) as dag: |
| 113 | +) as dag2: |
105 | 114 | # [START howto_operator_dlp_create_info_type]
|
106 | 115 | create_info_type = CloudDLPCreateStoredInfoTypeOperator(
|
107 | 116 | project_id=GCP_PROJECT,
|
108 | 117 | config=CUSTOM_INFO_TYPES,
|
109 | 118 | stored_info_type_id=CUSTOM_INFO_TYPE_ID,
|
110 |
| - dag=dag, |
111 | 119 | task_id="create_info_type",
|
112 | 120 | )
|
113 | 121 | # [END howto_operator_dlp_create_info_type]
|
|
116 | 124 | project_id=GCP_PROJECT,
|
117 | 125 | stored_info_type_id=CUSTOM_INFO_TYPE_ID,
|
118 | 126 | config=UPDATE_CUSTOM_INFO_TYPE,
|
119 |
| - dag=dag, |
120 | 127 | task_id="update_info_type",
|
121 | 128 | )
|
122 | 129 | # [END howto_operator_dlp_update_info_type]
|
123 | 130 | # [START howto_operator_dlp_delete_info_type]
|
124 | 131 | delete_info_type = CloudDLPDeleteStoredInfoTypeOperator(
|
125 |
| - project_id=GCP_PROJECT, stored_info_type_id=CUSTOM_INFO_TYPE_ID, dag=dag, task_id="delete_info_type", |
| 132 | + project_id=GCP_PROJECT, stored_info_type_id=CUSTOM_INFO_TYPE_ID, task_id="delete_info_type", |
126 | 133 | )
|
127 | 134 | # [END howto_operator_dlp_delete_info_type]
|
128 | 135 | create_info_type >> update_info_type >> delete_info_type
|
129 | 136 |
|
130 |
| -SCHEDULE = {"recurrence_period_duration": {"seconds": 60 * 60 * 24}} |
131 |
| -JOB = { |
132 |
| - "inspect_config": INSPECT_CONFIG, |
133 |
| -} |
134 |
| - |
135 | 137 | JOB_TRIGGER = {
|
136 |
| - "inspect_job": JOB, |
137 |
| - "triggers": [{"schedule": SCHEDULE}], |
| 138 | + "inspect_job": { |
| 139 | + "storage_config": { |
| 140 | + "datastore_options": {"partition_id": {"project_id": GCP_PROJECT}, "kind": {"name": "test"}} |
| 141 | + } |
| 142 | + }, |
| 143 | + "triggers": [{"schedule": {"recurrence_period_duration": {"seconds": 60 * 60 * 24}}}], |
138 | 144 | "status": "HEALTHY",
|
139 | 145 | }
|
140 | 146 |
|
141 | 147 | TRIGGER_ID = "example_trigger"
|
142 | 148 |
|
143 | 149 | with models.DAG(
|
144 |
| - "example_gcp_dlp_job", schedule_interval=None, start_date=days_ago(1), tags=["example", "dlp_job"], |
145 |
| -) as dag: # [START howto_operator_dlp_create_job_trigger] |
| 150 | + "example_gcp_dlp_job", schedule_interval=None, start_date=days_ago(1), tags=["example", "dlp_job"] |
| 151 | +) as dag3: # [START howto_operator_dlp_create_job_trigger] |
146 | 152 | create_trigger = CloudDLPCreateJobTriggerOperator(
|
147 | 153 | project_id=GCP_PROJECT,
|
148 | 154 | job_trigger=JOB_TRIGGER,
|
149 | 155 | trigger_id=TRIGGER_ID,
|
150 |
| - dag=dag, |
151 | 156 | task_id="create_trigger",
|
152 | 157 | )
|
153 | 158 | # [END howto_operator_dlp_create_job_trigger]
|
154 |
| - UPDATED_SCHEDULE = {"recurrence_period_duration": {"seconds": 2 * 60 * 60 * 24}} |
155 | 159 |
|
156 |
| - JOB_TRIGGER["triggers"] = [{"schedule": UPDATED_SCHEDULE}] |
| 160 | + JOB_TRIGGER["triggers"] = [{"schedule": {"recurrence_period_duration": {"seconds": 2 * 60 * 60 * 24}}}] |
157 | 161 |
|
158 | 162 | # [START howto_operator_dlp_update_job_trigger]
|
159 | 163 | update_trigger = CloudDLPUpdateJobTriggerOperator(
|
160 | 164 | project_id=GCP_PROJECT,
|
161 | 165 | job_trigger_id=TRIGGER_ID,
|
162 | 166 | job_trigger=JOB_TRIGGER,
|
163 |
| - dag=dag, |
164 | 167 | task_id="update_info_type",
|
165 | 168 | )
|
166 | 169 | # [END howto_operator_dlp_update_job_trigger]
|
167 | 170 | # [START howto_operator_dlp_delete_job_trigger]
|
168 | 171 | delete_trigger = CloudDLPDeleteJobTriggerOperator(
|
169 |
| - project_id=GCP_PROJECT, job_trigger_id=TRIGGER_ID, dag=dag, task_id="delete_info_type", |
| 172 | + project_id=GCP_PROJECT, job_trigger_id=TRIGGER_ID, task_id="delete_info_type" |
170 | 173 | )
|
171 | 174 | # [END howto_operator_dlp_delete_job_trigger]
|
172 | 175 | create_trigger >> update_trigger >> delete_trigger
|
| 176 | + |
0 commit comments