diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a465926..c71f85d0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,35 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.5.0](https://www.github.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) (2020-12-02) + + +### Features + +* add `TableReference.__str__` to get table ID in standard SQL ([#405](https://www.github.com/googleapis/python-bigquery/issues/405)) ([53dff2a](https://www.github.com/googleapis/python-bigquery/commit/53dff2ad3889af04369a22437e6ab9b92c5755b6)), closes [#354](https://www.github.com/googleapis/python-bigquery/issues/354) +* add progress bar for magics ([#396](https://www.github.com/googleapis/python-bigquery/issues/396)) ([04d0273](https://www.github.com/googleapis/python-bigquery/commit/04d027317a99e3f353e0b7a18076da9b6ba4d8d3)) +* add support for unrecognized model types ([#401](https://www.github.com/googleapis/python-bigquery/issues/401)) ([168f035](https://www.github.com/googleapis/python-bigquery/commit/168f0354c4815bd1aeadbd4e388dcc9b32f97d6b)) + + +### Bug Fixes + +* avoid floating point for timestamp in `insert_rows` ([#393](https://www.github.com/googleapis/python-bigquery/issues/393)) ([a1949ae](https://www.github.com/googleapis/python-bigquery/commit/a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf)) + + +### Performance Improvements + +* don't fetch rows when waiting for query to finish ([#400](https://www.github.com/googleapis/python-bigquery/issues/400)) ([730df17](https://www.github.com/googleapis/python-bigquery/commit/730df17ae1ab0b0bb2454f3c134c8f62665bc51b)), closes [#374](https://www.github.com/googleapis/python-bigquery/issues/374) [#394](https://www.github.com/googleapis/python-bigquery/issues/394) + + +### Documentation + +* **samples:** add more clustering code snippets ([#330](https://www.github.com/googleapis/python-bigquery/issues/330)) ([809e4a2](https://www.github.com/googleapis/python-bigquery/commit/809e4a27b94ba30c10e0c9a7e89576a9de9fda2b)), closes [#329](https://www.github.com/googleapis/python-bigquery/issues/329) + + +### Dependencies + +* update required version of opentelementry for opentelemetry-exporter-google-cloud ([#398](https://www.github.com/googleapis/python-bigquery/issues/398)) ([673a9cb](https://www.github.com/googleapis/python-bigquery/commit/673a9cb51c577c1dd016e76f3634b1e9e21482c5)) + ## [2.4.0](https://www.github.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) (2020-11-16) diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 7afca05e2..d924fe214 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -85,6 +85,23 @@ Load table data from a file with the :start-after: [START bigquery_load_from_file] :end-before: [END bigquery_load_from_file] +Creating a clustered table from a query result: + +.. literalinclude:: ../samples/client_query_destination_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_clustered_table] + :end-before: [END bigquery_query_clustered_table] + +Creating a clustered table when you load data with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: + +.. literalinclude:: ../samples/load_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_clustered] + :end-before: [END bigquery_load_table_clustered] + Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index b8d1cc4d7..41f987228 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions @@ -137,8 +138,9 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", - "StandardSqlDataTypes", "SourceFormat", + "SqlTypeNames", + "StandardSqlDataTypes", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b59b3d794..35129d844 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -23,7 +23,7 @@ from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _microseconds_from_datetime +from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes @@ -313,12 +313,9 @@ def _timestamp_to_json_parameter(value): def _timestamp_to_json_row(value): - """Coerce 'value' to an JSON-compatible representation. - - This version returns floating-point seconds value used in row data. - """ + """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) * 1e-6 + value = value.strftime(_RFC3339_MICROS) return value diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index bdecefe4a..2fcf2a981 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -55,15 +55,14 @@ def get_progress_bar(progress_bar_type, description, total, unit): def wait_for_query(query_job, progress_bar_type=None): """Return query result and display a progress bar while the query running, if tqdm is installed.""" - if progress_bar_type is None: - return query_job.result() - default_total = 1 current_stage = None start_time = time.time() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) + if progress_bar is None: + return query_job.result() i = 0 while True: if query_job.query_plan: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c67ef54e0..cd1474336 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1534,7 +1534,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {} + extra_params = {"maxResults": 0} if project is None: project = self.project @@ -3187,7 +3187,6 @@ def _list_rows_from_query_results( page_size=None, retry=DEFAULT_RETRY, timeout=None, - first_page_response=None, ): """List the rows of a completed query. See @@ -3248,7 +3247,6 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, - first_page_response=first_page_response, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7a1a74954..9e8908613 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1177,10 +1177,6 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - first_page_response = None - if max_results is None and page_size is None and start_index is None: - first_page_response = self._query_results._properties - rows = self._client._list_rows_from_query_results( self.job_id, self.location, @@ -1193,7 +1189,6 @@ def result( start_index=start_index, retry=retry, timeout=timeout, - first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 5645a84a5..f04a6364a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -182,6 +182,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() + self._progress_bar_type = "tqdm" @property def credentials(self): @@ -313,6 +314,26 @@ def default_query_job_config(self): def default_query_job_config(self, value): self._default_query_job_config = value + @property + def progress_bar_type(self): + """str: Default progress bar type to use to display progress bar while + executing queries through IPython magics. + + Note:: + Install the ``tqdm`` package to use this feature. + + Example: + Manually setting the progress_bar_type: + + >>> from google.cloud.bigquery import magics + >>> magics.context.progress_bar_type = "tqdm" + """ + return self._progress_bar_type + + @progress_bar_type.setter + def progress_bar_type(self, value): + self._progress_bar_type = value + context = Context() @@ -524,6 +545,15 @@ def _create_dataset_if_necessary(client, dataset_id): "name (ex. $my_dict_var)." ), ) +@magic_arguments.argument( + "--progress_bar_type", + type=str, + default=None, + help=( + "Sets progress bar type to display a progress bar while executing the query." + "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -687,12 +717,16 @@ def _cell_magic(line, query): ) return query_job + progress_bar = context.progress_bar_type or args.progress_bar_type + if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar ) else: - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + result = query_job.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 1143b71f9..0f5d8f83b 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -305,9 +305,15 @@ def from_api_repr(cls, resource): start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + try: + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) + except json_format.ParseError: + resource["modelType"] = "MODEL_TYPE_UNSPECIFIED" + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) return this def _build_resource(self, filter_fields): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4bfedd758..f30c05773 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -262,6 +262,9 @@ def __ne__(self, other): def __hash__(self): return hash(self._key()) + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.table_id}" + def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference @@ -475,7 +478,7 @@ def full_table_id(self): """Union[str, None]: ID for the table (:data:`None` until set from the server). - In the format ``project_id:dataset_id.table_id``. + In the format ``project-id:dataset_id.table_id``. """ return self._properties.get("id") @@ -484,7 +487,8 @@ def table_type(self): """Union[str, None]: The type of the table (:data:`None` until set from the server). - Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. + Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or + ``'EXTERNAL'``. """ return self._properties.get("type") diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fe11624d9..5836d8051 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.4.0" +__version__ = "2.5.0" diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py new file mode 100644 index 000000000..5a109ed10 --- /dev/null +++ b/samples/client_query_destination_table_clustered.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_clustered(table_id): + + # [START bigquery_query_clustered_table] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`" + cluster_fields = ["corpus"] + + job_config = bigquery.QueryJobConfig( + clustering_fields=cluster_fields, destination=table_id + ) + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) # Make an API request. + if table.clustering_fields == cluster_fields: + print( + "The destination table is written using the cluster_fields configuration." + ) + # [END bigquery_query_clustered_table] diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py new file mode 100644 index 000000000..20d412cb3 --- /dev/null +++ b/samples/load_table_clustered.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_clustered(table_id): + + # [START bigquery_load_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + skip_leading_rows=1, + source_format=bigquery.SourceFormat.CSV, + schema=[ + bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC), + ], + time_partitioning=bigquery.TimePartitioning(field="timestamp"), + clustering_fields=["origin", "destination"], + ) + + job = client.load_table_from_uri( + ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"], + table_id, + job_config=job_config, + ) + + job.result() # Waits for the job to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_clustered] + return table diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bf895a1ae..eeb94db5a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,10 +1,10 @@ -google-cloud-bigquery==2.2.0 +google-cloud-bigquery==2.4.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.2 +matplotlib==3.3.3 pandas==1.1.4 -pyarrow==1.0.1 -pytz==2020.1 +pyarrow==2.0.0 +pytz==2020.4 diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py new file mode 100644 index 000000000..b4bdd588c --- /dev/null +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_destination_table_clustered + + +def test_client_query_destination_table_clustered(capsys, random_table_id): + + client_query_destination_table_clustered.client_query_destination_table_clustered( + random_table_id + ) + out, err = capsys.readouterr() + assert ( + "The destination table is written using the cluster_fields configuration." + in out + ) diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py new file mode 100644 index 000000000..bafdc2051 --- /dev/null +++ b/samples/tests/test_load_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_clustered + + +def test_load_table_clustered(capsys, random_table_id, client): + + table = load_table_clustered.load_table_clustered(random_table_id) + + out, _ = capsys.readouterr() + assert "rows and 4 columns" in out + + rows = list(client.list_rows(table)) # Make an API request. + assert len(rows) > 0 + assert table.clustering_fields == ["origin", "destination"] diff --git a/setup.py b/setup.py index 48c4a7518..5f4e506eb 100644 --- a/setup.py +++ b/setup.py @@ -55,9 +55,9 @@ ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.9b0", - "opentelemetry-sdk==0.9b0", - "opentelemetry-instrumentation==0.9b0 ", + "opentelemetry-api==0.11b0", + "opentelemetry-sdk==0.11b0", + "opentelemetry-instrumentation==0.11b0", ], } diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 41e31f469..daaf2e557 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -787,9 +787,7 @@ def test_result(self): "location": "EU", }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "3", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "2", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -801,9 +799,9 @@ def test_result(self): query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource @@ -814,20 +812,19 @@ def test_result(self): result = job.result() self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 3) + self.assertEqual(result.total_rows, 2) rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. - self.assertEqual(result.total_rows, 2) + self.assertEqual(result.total_rows, 1) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( @@ -842,7 +839,6 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -855,9 +851,7 @@ def test_result_with_done_job_calls_get_query_results(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "1", } job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { @@ -866,9 +860,9 @@ def test_result_with_done_job_calls_get_query_results(self): "tableId": "dest_table", } results_page_resource = { - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) @@ -877,15 +871,14 @@ def test_result_with_done_job_calls_get_query_results(self): result = job.result() rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) query_results_page_call = mock.call( @@ -894,7 +887,6 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -908,12 +900,6 @@ def test_result_with_max_results(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because max_results is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } query_page_resource = { "totalRows": "5", @@ -939,7 +925,6 @@ def test_result_with_max_results(self): rows = list(result) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) query_page_request = connection.api_request.call_args_list[1] self.assertEqual( @@ -994,7 +979,7 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"location": "asia-northeast1"}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( @@ -1094,12 +1079,6 @@ def test_result_w_page_size(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", - # These rows are discarded because page_size is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] @@ -1130,7 +1109,6 @@ def test_result_w_page_size(self): # Assert actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - self.assertEqual(actual_rows[0].col1, "row1") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_page_1_call = mock.call( @@ -1164,12 +1142,6 @@ def test_result_with_start_index(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because start_index is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } tabledata_resource = { "totalRows": "5", @@ -1196,7 +1168,6 @@ def test_result_with_start_index(self): rows = list(result) self.assertEqual(len(rows), 4) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) tabledata_list_request = connection.api_request.call_args_list[1] self.assertEqual( diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index f9d823eb0..cdd6f2b3c 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -100,7 +100,6 @@ def test_to_dataframe_bqstorage_preserve_order(query): ] }, "totalRows": "4", - "pageToken": "next-page", } connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) @@ -135,16 +134,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_arrow(method_kwargs): +def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -172,6 +162,8 @@ def test_to_arrow(method_kwargs): }, ] }, + } + tabledata_resource = { "rows": [ { "f": [ @@ -185,15 +177,17 @@ def test_to_arrow(method_kwargs): {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, ] }, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - tbl = job.to_arrow(**method_kwargs) + tbl = job.to_arrow(create_bqstorage_client=False) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 @@ -375,16 +369,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_dataframe(method_kwargs): +def test_to_dataframe(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -398,20 +383,24 @@ def test_to_dataframe(method_kwargs): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + } + tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(**method_kwargs) + df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows @@ -456,7 +445,6 @@ def test_to_dataframe_bqstorage(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - "pageToken": "next-page", } connection = _make_connection(query_resource) client = _make_client(connection=connection) diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 9fa29a496..8f0bf58d5 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -186,6 +186,23 @@ def test_from_api_repr_w_unknown_fields(target_class): assert got._properties is resource +def test_from_api_repr_w_unknown_type(target_class): + from google.cloud.bigquery import ModelReference + + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "modelType": "BE_A_GOOD_ROLE_MODEL", + } + got = target_class.from_api_repr(resource) + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got.model_type == 0 + assert got._properties is resource + + @pytest.mark.parametrize( "resource,filter_fields,expected", [ diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 16c4fb8a5..a52581501 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -728,10 +728,14 @@ def test_w_string(self): self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime(self): - from google.cloud._helpers import _microseconds_from_datetime - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) - self.assertEqual(self._call_fut(when), _microseconds_from_datetime(when) / 1e6) + self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z") + + def test_w_datetime_w_utc_zone(self): + from google.cloud._helpers import UTC + + when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC) + self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z") class Test_datetime_to_json(unittest.TestCase): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dd57ee798..c4bdea2f8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): conn.api_request.assert_called_once_with( method="GET", path=path, - query_params={"timeoutMs": 500, "location": self.LOCATION}, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self): conn.api_request.assert_called_once_with( method="GET", path="/projects/PROJECT/queries/nothere", - query_params={"location": self.LOCATION}, + query_params={"maxResults": 0, "location": self.LOCATION}, timeout=None, ) @@ -5804,7 +5804,7 @@ def test_insert_rows_w_schema(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 @@ -5834,7 +5834,7 @@ def _row_data(row): result = {"full_name": row[0], "age": str(row[1])} joined = row[2] if isinstance(joined, datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 + joined = joined.strftime(_RFC3339_MICROS) if joined is not None: result["joined"] = joined return result @@ -5864,7 +5864,7 @@ def test_insert_rows_w_list_of_dictionaries(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5910,7 +5910,7 @@ def _row_data(row): row = copy.deepcopy(row) del row["joined"] elif isinstance(joined, datetime.datetime): - row["joined"] = _microseconds_from_datetime(joined) * 1e-6 + row["joined"] = joined.strftime(_RFC3339_MICROS) row["age"] = str(row["age"]) return row @@ -6109,16 +6109,16 @@ def test_insert_rows_w_repeated_fields(self): { "score": "12", "times": [ - 1543665600.0, # 2018-12-01 12:00 UTC - 1543669200.0, # 2018-12-01 13:00 UTC + "2018-12-01T12:00:00.000000Z", + "2018-12-01T13:00:00.000000Z", ], "distances": [1.25, 2.5], }, { "score": "13", "times": [ - 1543752000.0, # 2018-12-02 12:00 UTC - 1543755600.0, # 2018-12-02 13:00 UTC + "2018-12-02T12:00:00.000000Z", + "2018-12-02T13:00:00.000000Z", ], "distances": [-1.25, -2.5], }, diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a7cf92919..ff41fe720 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -623,7 +623,7 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" ) assert isinstance(return_value, pandas.DataFrame) @@ -665,7 +665,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) bqstorage_mock.assert_not_called() - query_job_mock.to_dataframe.assert_called_once_with(bqstorage_client=None) + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type="tqdm" + ) assert isinstance(return_value, pandas.DataFrame) @@ -1167,6 +1169,71 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): assert sent_config["maximumBytesBilled"] == "10203" +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + magics.context.progress_bar_type = "tqdm_gui" + + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock, bqstorage_client_patch: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) + + bqstorage_mock.assert_not_called() + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + ) + + assert isinstance(return_value, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_progress_bar_type(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.progress_bar_type = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" + ) + + progress_bar_used = run_query_mock.mock_calls[1][2]["progress_bar_type"] + assert progress_bar_used == "tqdm_gui" + # context progress bar type should not change + assert magics.context.progress_bar_type is None + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index be67eafcd..67874ff91 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -272,6 +272,11 @@ def test___repr__(self): ) self.assertEqual(repr(table1), expected) + def test___str__(self): + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(dataset, "table1") + self.assertEqual(str(table1), "project1.dataset1.table1") + class TestTable(unittest.TestCase, _SchemaBase): @@ -813,6 +818,9 @@ def test_from_string(self): self.assertEqual(got.project, "string-project") self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") + self.assertEqual( + str(got.reference), "string-project.string_dataset.string_table" + ) def test_from_string_legacy_string(self): cls = self._get_target_class() @@ -1630,6 +1638,40 @@ def test_iterate(self): api_request.assert_called_once_with(method="GET", path=path, query_params={}) + def test_iterate_with_cached_first_page(self): + from google.cloud.bigquery.schema import SchemaField + + first_page = { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + ], + "pageToken": "next-page", + } + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, first_page_response=first_page + ) + rows = list(row_iterator) + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0].age, 27) + self.assertEqual(rows[1].age, 28) + self.assertEqual(rows[2].age, 32) + self.assertEqual(rows[3].age, 33) + + api_request.assert_called_once_with( + method="GET", path=path, query_params={"pageToken": "next-page"} + ) + def test_page_size(self): from google.cloud.bigquery.schema import SchemaField @@ -1655,6 +1697,29 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) + def test__is_completely_cached_returns_false_without_first_page(self): + iterator = self._make_one(first_page_response=None) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_false_with_page_token(self): + first_page = {"pageToken": "next-page"} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_true(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertTrue(iterator._is_completely_cached()) + + def test__validate_bqstorage_returns_false_when_completely_cached(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse( + iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField