diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e7cafc47e..8ad1586f4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9df572b14..6584ca03c 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -978,6 +978,53 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + + # Update the external_config and include it in the table arguments + updated_external_config = copy.deepcopy(external_config) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + # PATCH call with autodetect_schema=True to trigger schema inference + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table should have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 34ef680dd..b8140df66 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2385,7 +2385,7 @@ def test_update_table(self): "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={} ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2439,6 +2439,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2475,6 +2476,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2567,9 +2569,10 @@ def test_update_table_w_query(self): "schema": schema_resource, }, timeout=DEFAULT_TIMEOUT, + query_params={}, ) - def test_update_table_w_schema_None(self): + def test_update_table_w_schema_None_autodetect_schema(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2611,7 +2614,9 @@ def test_update_table_w_schema_None(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + updated_table = client.update_table( + table, ["schema"], autodetect_schema=True + ) final_attributes.assert_called_once_with( {"path": "/%s" % path, "fields": ["schema"]}, client, None @@ -2623,6 +2628,7 @@ def test_update_table_w_schema_None(self): sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(req[1]["query_params"], {"autodetect_schema": True}) self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self):