diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index fc281c05b..6fe78aa79 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -1,6 +1,6 @@ {"allowedCopyrightHolders": ["Google LLC"], "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], "sourceFileExtensions": [ "ts", "js", diff --git a/.gitignore b/.gitignore index b4243ced7..99c3a1444 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ pip-log.txt .nox .cache .pytest_cache +.pytype # Mac diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 0e58f0640..3abba6e06 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password") +TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") cd github/python-bigquery python3 setup.py sdist bdist_wheel -twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/* +twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 18b417709..922d7fe50 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,18 +23,8 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} \ No newline at end of file + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9024b15d..8912e9b5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: @@ -12,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.0 hooks: - id: flake8 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dc2c8838..9aee40510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) + + +### Features + +* accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) +* accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) +* add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) +* add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) +* add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) +* DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) +* retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) +* use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) + + +### Bug Fixes + +* consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) +* missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) +* unsetting clustering fields on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) + + +### Documentation + +* add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) +* update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) + ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index bcd37bbd3..b0a295464 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,9 +1,20 @@ div#python2-eol { border-color: red; border-width: medium; -} +} /* Ensure minimum width for 'Parameters' / 'Returns' column */ dl.field-list > dt { min-width: 100px } + +/* Insert space between methods for readability */ +dl.method { + padding-top: 10px; + padding-bottom: 10px +} + +/* Insert empty space between classes */ +dl.class { + padding-bottom: 50px +} diff --git a/docs/conf.py b/docs/conf.py index 37e0c46af..fdea01aad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,17 @@ # -*- coding: utf-8 -*- +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # # google-cloud-bigquery documentation build configuration file # diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index ede26cc70..81e7922e6 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -17,8 +17,7 @@ import os import pkg_resources -from google.cloud import _http - +from google.cloud import _http # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7ad416e08..e93a99eba 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -33,6 +33,14 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import schema @@ -45,6 +53,8 @@ _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. +_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads + _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -363,6 +373,7 @@ def augment_schema(dataframe, current_bq_schema): Returns: Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] """ + # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] @@ -396,6 +407,7 @@ def augment_schema(dataframe, current_bq_schema): return None return augmented_schema + # pytype: enable=attribute-error def dataframe_to_arrow(dataframe, bq_schema): @@ -608,6 +620,7 @@ def _download_table_bqstorage( preserve_order=False, selected_fields=None, page_to_item=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" @@ -631,6 +644,11 @@ def _download_table_bqstorage( for field in selected_fields: requested_session.read_options.selected_fields.append(field.name) + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + session = bqstorage_client.create_read_session( parent="projects/{}".format(project_id), read_session=requested_session, @@ -654,7 +672,17 @@ def _download_table_bqstorage( download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. - worker_queue = queue.Queue() + # + # The queue needs to be bounded by default, because if the user code processes the + # fetched result pages too slowly, while at the same time new pages are rapidly being + # fetched from the server, the queue can grow to the point where the process runs + # out of memory. + if max_queue_size is _MAX_QUEUE_SIZE_DEFAULT: + max_queue_size = total_streams + elif max_queue_size is None: + max_queue_size = 0 # unbounded + + worker_queue = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -695,15 +723,12 @@ def _download_table_bqstorage( continue # Return any remaining values after the workers finished. - while not worker_queue.empty(): # pragma: NO COVER + while True: # pragma: NO COVER try: - # Include a timeout because even though the queue is - # non-empty, it doesn't guarantee that a subsequent call to - # get() will not block. - frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + frame = worker_queue.get_nowait() yield frame except queue.Empty: # pragma: NO COVER - continue + break finally: # No need for a lock because reading/replacing a variable is # defined to be an atomic operation in the Python language @@ -716,7 +741,7 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None + project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, ): return _download_table_bqstorage( project_id, @@ -736,6 +761,7 @@ def download_dataframe_bqstorage( dtypes, preserve_order=False, selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -745,6 +771,7 @@ def download_dataframe_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=page_to_item, + max_queue_size=max_queue_size, ) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 305d60d3b..8d0acb867 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -19,6 +19,7 @@ from collections import abc as collections_abc import copy +import datetime import functools import gzip import io @@ -27,6 +28,7 @@ import math import os import tempfile +from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings @@ -35,17 +37,18 @@ except ImportError: # pragma: NO COVER pyarrow = None -from google import resumable_media +from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload import google.api_core.client_options -import google.api_core.exceptions +import google.api_core.exceptions as core_exceptions from google.api_core.iam import Policy from google.api_core import page_iterator +from google.api_core import retry as retries import google.cloud._helpers -from google.cloud import exceptions -from google.cloud.client import ClientWithProject +from google.cloud import exceptions # pytype: disable=import-error +from google.cloud.client import ClientWithProject # pytype: disable=import-error from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -59,6 +62,13 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job +from google.cloud.bigquery.job import ( + LoadJobConfig, + QueryJob, + QueryJobConfig, + CopyJobConfig, + ExtractJobConfig, +) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref @@ -216,8 +226,11 @@ def close(self): self._http.close() def get_service_account_email( - self, project=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> str: """Get the email address of the project's BigQuery service account Note: @@ -259,8 +272,12 @@ def get_service_account_email( return api_response["email"] def list_projects( - self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List projects for the project associated with this client. See @@ -313,14 +330,14 @@ def api_request(*args, **kwargs): def list_datasets( self, - project=None, - include_all=False, - filter=None, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + project: str = None, + include_all: bool = False, + filter: str = None, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List datasets for the project associated with this client. See @@ -390,7 +407,7 @@ def api_request(*args, **kwargs): extra_params=extra_params, ) - def dataset(self, dataset_id, project=None): + def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -449,9 +466,29 @@ def _create_bqstorage_client(self): return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + def _dataset_from_arg(self, dataset): + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + if isinstance(dataset, DatasetListItem): + dataset = dataset.reference + else: + raise TypeError( + "dataset must be a Dataset, DatasetReference, DatasetListItem," + " or string" + ) + return dataset + def create_dataset( - self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + dataset: Union[str, Dataset, DatasetReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """API call: create the dataset via a POST request. See @@ -461,6 +498,7 @@ def create_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. @@ -491,10 +529,7 @@ def create_dataset( >>> dataset = client.create_dataset(dataset) """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) + dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): dataset = Dataset(dataset) @@ -517,14 +552,18 @@ def create_dataset( timeout=timeout, ) return Dataset.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_dataset(dataset.reference, retry=retry) def create_routine( - self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + routine: Routine, + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Create a routine via a POST request. See @@ -568,12 +607,18 @@ def create_routine( timeout=timeout, ) return Routine.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_routine(routine.reference, retry=retry) - def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None): + def create_table( + self, + table: Union[str, Table, TableReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """API call: create a table via a PUT request See @@ -622,7 +667,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None timeout=timeout, ) return Table.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_table(table.reference, retry=retry) @@ -640,7 +685,12 @@ def _call_api( return call() return call() - def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): + def get_dataset( + self, + dataset_ref: Union[DatasetReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` Args: @@ -679,8 +729,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): return Dataset.from_api_repr(api_response) def get_iam_policy( - self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + requested_policy_version: int = 1, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -704,8 +758,13 @@ def get_iam_policy( return Policy.from_api_repr(response) def set_iam_policy( - self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + policy: Policy, + updateMask: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -733,8 +792,12 @@ def set_iam_policy( return Policy.from_api_repr(response) def test_iam_permissions( - self, table, permissions, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + permissions: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -754,7 +817,12 @@ def test_iam_permissions( return response - def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): + def get_model( + self, + model_ref: Union[ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. Args: @@ -792,7 +860,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): + def get_routine( + self, + routine_ref: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. Args: @@ -831,7 +904,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): + def get_table( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Fetch the table referenced by ``table``. Args: @@ -867,7 +945,13 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): ) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): + def update_dataset( + self, + dataset: Dataset, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -931,7 +1015,13 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) - def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): + def update_model( + self, + model: Model, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Change some fields of a model. Use ``fields`` to specify which fields to update. At least one field @@ -989,7 +1079,13 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): + def update_routine( + self, + routine: Routine, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Change some fields of a routine. Use ``fields`` to specify which fields to update. At least one field @@ -1057,7 +1153,13 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): + def update_table( + self, + table: Table, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field @@ -1118,12 +1220,12 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): def list_models( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List models in the dataset. See @@ -1133,6 +1235,7 @@ def list_models( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose models to list from the @@ -1160,13 +1263,7 @@ def list_models( :class:`~google.cloud.bigquery.model.Model` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") + dataset = self._dataset_from_arg(dataset) path = "%s/models" % dataset.path span_attributes = {"path": path} @@ -1195,12 +1292,12 @@ def api_request(*args, **kwargs): def list_routines( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. See @@ -1210,6 +1307,7 @@ def list_routines( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose routines to list from the @@ -1237,14 +1335,7 @@ def list_routines( :class:`~google.cloud.bigquery.routine.Routine`s contained within the requested dataset, limited by ``max_results``. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "{}/routines".format(dataset.path) span_attributes = {"path": path} @@ -1273,12 +1364,12 @@ def api_request(*args, **kwargs): def list_tables( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List tables in the dataset. See @@ -1288,6 +1379,7 @@ def list_tables( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose tables to list from the @@ -1315,14 +1407,7 @@ def list_tables( :class:`~google.cloud.bigquery.table.TableListItem` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "%s/tables" % dataset.path span_attributes = {"path": path} @@ -1350,12 +1435,12 @@ def api_request(*args, **kwargs): def delete_dataset( self, - dataset, - delete_contents=False, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, - ): + dataset: Union[Dataset, DatasetReference, str], + delete_contents: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a dataset. See @@ -1365,6 +1450,7 @@ def delete_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset to delete. If a string is passed @@ -1384,14 +1470,7 @@ def delete_dataset( Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset or a DatasetReference") - + dataset = self._dataset_from_arg(dataset) params = {} path = dataset.path if delete_contents: @@ -1410,13 +1489,17 @@ def delete_dataset( query_params=params, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_model( - self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + model: Union[Model, ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a model See @@ -1458,13 +1541,88 @@ def delete_model( path=path, timeout=timeout, ) + except core_exceptions.NotFound: + if not not_found_ok: + raise + + def delete_job_metadata( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = f"/projects/{project}/jobs/{job_id}/delete" + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) except google.api_core.exceptions.NotFound: if not not_found_ok: raise def delete_routine( - self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + routine: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a routine. See @@ -1508,13 +1666,17 @@ def delete_routine( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_table( - self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a table See @@ -1554,13 +1716,19 @@ def delete_table( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, - ): + self, + job_id: str, + retry: retries.Retry, + project: str = None, + timeout_ms: int = None, + location: str = None, + timeout: float = None, + ) -> _QueryResults: """Get the query results object for a query job. Args: @@ -1618,7 +1786,7 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource): + def job_from_resource(self, resource: dict) -> job.UnknownJob: """Detect correct job type from resource and instantiate. Args: @@ -1644,7 +1812,12 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): + def create_job( + self, + job_config: dict, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1735,20 +1908,33 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): raise TypeError("Invalid job configuration received.") def get_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1766,6 +1952,10 @@ def get_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -1792,20 +1982,33 @@ def get_job( return self.job_from_resource(resource) def cancel_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1823,6 +2026,10 @@ def cancel_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -1850,17 +2057,17 @@ def cancel_job( def list_jobs( self, - project=None, - parent_job=None, - max_results=None, - page_token=None, - all_users=None, - state_filter=None, - retry=DEFAULT_RETRY, - timeout=None, - min_creation_time=None, - max_creation_time=None, - ): + project: str = None, + parent_job: Optional[Union[QueryJob, str]] = None, + max_results: int = None, + page_token: str = None, + all_users: bool = None, + state_filter: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + min_creation_time: datetime.datetime = None, + max_creation_time: datetime.datetime = None, + ) -> page_iterator.Iterator: """List jobs for the project associated with this client. See @@ -1911,7 +2118,7 @@ def list_jobs( Iterable of job instances. """ if isinstance(parent_job, job._AsyncJob): - parent_job = parent_job.job_id + parent_job = parent_job.job_id # pytype: disable=attribute-error extra_params = { "allUsers": all_users, @@ -1960,16 +2167,16 @@ def api_request(*args, **kwargs): def load_table_from_uri( self, - source_uris, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + source_uris: Union[str, Sequence[str]], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. See @@ -2042,18 +2249,18 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj, - destination, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + file_obj: BinaryIO, + destination: Union[Table, TableReference, str], + rewind: bool = False, + size: int = None, + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of this table from a file-like object. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2147,16 +2354,16 @@ def load_table_from_file( def load_table_from_dataframe( self, dataframe, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - parquet_compression="snappy", - timeout=None, - ): + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + parquet_compression: str = "snappy", + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2284,7 +2491,7 @@ def load_table_from_dataframe( ): try: table = self.get_table(destination) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: table = None else: columns_and_indexes = frozenset( @@ -2373,16 +2580,16 @@ def load_table_from_dataframe( def load_table_from_json( self, - json_rows, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + json_rows: Iterable[Dict[str, Any]], + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. Args: @@ -2654,16 +2861,18 @@ def _do_multipart_upload( def copy_table( self, - sources, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + sources: Union[ + Table, TableReference, str, Sequence[Union[Table, TableReference, str]] + ], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: CopyJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.CopyJob: """Copy one or more tables to another table. See @@ -2757,17 +2966,17 @@ def copy_table( def extract_table( self, - source, - destination_uris, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - source_type="Table", - ): + source: Union[Table, TableReference, Model, ModelReference, str], + destination_uris: Union[str, Sequence[str]], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: ExtractJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + source_type: str = "Table", + ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. See @@ -2856,15 +3065,15 @@ def extract_table( def query( self, - query, - job_config=None, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + query: str, + job_config: QueryJobConfig = None, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.QueryJob: """Run a SQL query. See @@ -2941,7 +3150,13 @@ def query( return query_job - def insert_rows(self, table, rows, selected_fields=None, **kwargs): + def insert_rows( + self, + table: Union[Table, TableReference, str], + rows: Union[Iterable[Tuple], Iterable[Dict]], + selected_fields: Sequence[SchemaField] = None, + **kwargs: dict, + ) -> Sequence[dict]: """Insert rows into a table via the streaming API. See @@ -2964,7 +3179,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. - kwargs (Dict): + kwargs (dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -3004,8 +3219,13 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) def insert_rows_from_dataframe( - self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs - ): + self, + table: Union[Table, TableReference, str], + dataframe, + selected_fields: Sequence[SchemaField] = None, + chunk_size: int = 500, + **kwargs: Dict, + ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. Args: @@ -3053,15 +3273,15 @@ def insert_rows_from_dataframe( def insert_rows_json( self, - table, - json_rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableReference, str], + json_rows: Sequence[Dict], + row_ids: Sequence[str] = None, + skip_invalid_rows: bool = None, + ignore_unknown_values: bool = None, + template_suffix: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. See @@ -3157,7 +3377,12 @@ def insert_rows_json( return errors - def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): + def list_partitions( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[str]: """List the partitions in a table. Args: @@ -3199,15 +3424,15 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): def list_rows( self, - table, - selected_fields=None, - max_results=None, - page_token=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableListItem, TableReference, str], + selected_fields: Sequence[SchemaField] = None, + max_results: int = None, + page_token: str = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of the table. See @@ -3308,18 +3533,18 @@ def list_rows( def _list_rows_from_query_results( self, - job_id, - location, - project, - schema, - total_rows=None, - destination=None, - max_results=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + job_id: str, + location: str, + project: str, + schema: SchemaField, + total_rows: int = None, + destination: Union[Table, TableReference, TableListItem, str] = None, + max_results: int = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of a completed query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults @@ -3404,7 +3629,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path): + def schema_from_json(self, file_or_path: Union[str, BinaryIO]): """Takes a file object or file path that contains json that describes a table schema. @@ -3417,7 +3642,9 @@ def schema_from_json(self, file_or_path): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json(self, schema_list, destination): + def schema_to_json( + self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] + ): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. @@ -3527,6 +3754,37 @@ def _item_to_table(iterator, resource): return TableListItem(resource) +def _extract_job_reference(job, project=None, location=None): + """Extract fully-qualified job reference from a job-like object. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + project (Optional[str]): + Project where the job was run. Ignored if ``job_id`` is a job + object. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + + Returns: + Tuple[str, str, str]: ``(project, location, job_id)`` + """ + if hasattr(job, "job_id"): + project = job.project + job_id = job.job_id + location = job.location + else: + job_id = job + + return (project, location, job_id) + + def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. @@ -3560,7 +3818,7 @@ def _check_mode(stream): mode = getattr(stream, "mode", None) if isinstance(stream, gzip.GzipFile): - if mode != gzip.READ: + if mode != gzip.READ: # pytype: disable=module-attr raise ValueError( "Cannot upload gzip files opened in write mode: use " "gzip.GzipFile(filename, mode='rb')" diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 2d3a4755f..21e56f305 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -220,7 +220,7 @@ def to_api_repr(self): return resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "AccessEntry": """Factory: construct an access entry given its API representation Args: @@ -288,7 +288,7 @@ def path(self): routine = _get_routine_reference @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "DatasetReference": """Factory: construct a dataset reference given its API representation Args: @@ -304,7 +304,9 @@ def from_api_repr(cls, resource): return cls(project, dataset_id) @classmethod - def from_string(cls, dataset_id, default_project=None): + def from_string( + cls, dataset_id: str, default_project: str = None + ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. Args: @@ -350,7 +352,7 @@ def from_string(cls, dataset_id, default_project=None): return cls(output_project_id, output_dataset_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset reference Returns: @@ -640,7 +642,7 @@ def default_encryption_configuration(self, value): self._properties["defaultEncryptionConfiguration"] = api_repr @classmethod - def from_string(cls, full_dataset_id): + def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. Args: @@ -664,7 +666,7 @@ def from_string(cls, full_dataset_id): return cls(DatasetReference.from_string(full_dataset_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Dataset": """Factory: construct a dataset given its API representation Args: @@ -689,7 +691,7 @@ def from_api_repr(cls, resource): dataset._properties = copy.deepcopy(resource) return dataset - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset Returns: diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 69694c98c..beb3c5e71 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -276,7 +276,7 @@ def decorate_public_methods(klass): """Apply ``_raise_on_closed()`` decorator to public instance methods. """ for name in dir(klass): - if name.startswith("_"): + if name.startswith("_") and name != "__iter__": continue member = getattr(klass, name) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index e90bcc2c0..ca78d3907 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -19,6 +19,14 @@ import copy import logging +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -255,6 +263,12 @@ def _bqstorage_fetch(self, bqstorage_client): table=table_reference.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW, ) + + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + read_session = bqstorage_client.create_read_session( parent="projects/{}".format(table_reference.project), read_session=requested_session, @@ -351,6 +365,10 @@ def setinputsizes(self, sizes): def setoutputsize(self, size, column=None): """No-op, but for consistency raise an error if cursor is closed.""" + def __iter__(self): + self._try_fetch() + return iter(self._query_data) + def _format_operation_list(operation, parameters): """Formats parameters in operation in the way BigQuery expects. @@ -375,7 +393,7 @@ def _format_operation_list(operation, parameters): try: return operation % tuple(formatted_params) - except TypeError as exc: + except (TypeError, ValueError) as exc: raise exceptions.ProgrammingError(exc) @@ -405,7 +423,7 @@ def _format_operation_dict(operation, parameters): try: return operation % formatted_params - except KeyError as exc: + except (KeyError, ValueError, TypeError) as exc: raise exceptions.ProgrammingError(exc) @@ -427,7 +445,7 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation + return operation.replace("%%", "%") # Still do percent de-escaping. if isinstance(parameters, collections_abc.Mapping): return _format_operation_dict(operation, parameters) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 59e4960f9..ef4d569fa 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -149,7 +149,7 @@ def type_(self): def type_(self, value): self._properties["type"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -159,7 +159,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumn": """Factory: construct a :class:`~.external_config.BigtableColumn` instance given its API representation. @@ -251,7 +251,7 @@ def columns(self): def columns(self, value): self._properties["columns"] = [col.to_api_repr() for col in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -261,7 +261,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": """Factory: construct a :class:`~.external_config.BigtableColumnFamily` instance given its API representation. @@ -333,7 +333,7 @@ def column_families(self): def column_families(self, value): self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -343,7 +343,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableOptions": """Factory: construct a :class:`~.external_config.BigtableOptions` instance given its API representation. @@ -450,7 +450,7 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -459,7 +459,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "CSVOptions": """Factory: construct a :class:`~.external_config.CSVOptions` instance given its API representation. @@ -513,7 +513,7 @@ def range(self): def range(self, value): self._properties["range"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -522,7 +522,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` instance given its API representation. @@ -601,7 +601,7 @@ def require_partition_filter(self): def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -610,7 +610,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions": """Factory: construct a :class:`~.external_config.HivePartitioningOptions` instance given its API representation. @@ -784,7 +784,7 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -799,7 +799,7 @@ def to_api_repr(self): return config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ExternalConfig": """Factory: construct an :class:`~.external_config.ExternalConfig` instance given its API representation. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f24e972c8..20ad81c0b 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -18,6 +18,7 @@ import copy import http import threading +import typing from google.api_core import exceptions import google.api_core.future.polling @@ -25,6 +26,9 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.api_core import retry as retries + _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -466,7 +470,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def exists( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: test for the existence of the job via a GET request See @@ -509,7 +515,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): else: return True - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def reload( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """API call: refresh job properties via a GET request. See @@ -544,7 +552,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def cancel( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: cancel job via a POST request See @@ -610,7 +620,12 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + def done( + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + reload: bool = True, + ) -> bool: """Checks if the job is complete. Args: @@ -633,7 +648,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result(self, retry=DEFAULT_RETRY, timeout=None): + def result( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. Args: @@ -788,7 +805,7 @@ def _del_sub_prop(self, key): """ _helpers._del_sub_prop(self._properties, [self._job_type, key]) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the job config. Returns: @@ -818,7 +835,10 @@ def _fill_from_default(self, default_job_config): + repr(default_job_config._job_type) ) - new_job_config = self.__class__() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter default_job_properties = copy.deepcopy(default_job_config._properties) for key in self._properties: @@ -831,7 +851,7 @@ def _fill_from_default(self, default_job_config): return new_job_config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "_JobConfig": """Factory: construct a job configuration given its API representation Args: @@ -842,7 +862,10 @@ def from_api_repr(cls, resource): Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - job_config = cls() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # pytype: disable=missing-parameter job_config._properties = resource return job_config @@ -929,7 +952,7 @@ class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "UnknownJob": """Construct an UnknownJob from the JSON representation. Args: diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index a6e262a32..3373bcdef 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -241,7 +241,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e784af0a6..b8174af3e 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -733,7 +733,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 491983f8e..f52f9c621 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -17,6 +17,8 @@ import concurrent.futures import copy import re +import typing +from typing import Any, Dict, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -46,6 +48,15 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +if typing.TYPE_CHECKING: # pragma: NO COVER + # Assumption: type checks are only used by library developers and CI environments + # that have all optional dependencies installed, thus no conditional imports. + import pandas + import pyarrow + from google.api_core import retry as retries + from google.cloud import bigquery_storage + from google.cloud.bigquery.table import RowIterator + _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _TIMEOUT_BUFFER_SECS = 0.1 @@ -491,7 +502,7 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the query job config. Returns: @@ -718,7 +729,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "QueryJob": """Factory: construct a job given its API representation Args: @@ -1036,7 +1047,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise - def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + def _reload_query_results( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """Refresh the cached query results. Args: @@ -1111,12 +1124,12 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): + page_size: int = None, + max_results: int = None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + start_index: int = None, + ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. Args: @@ -1196,10 +1209,10 @@ def result( # changes to table.RowIterator.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1265,12 +1278,12 @@ def to_arrow( # changes to table.RowIterator.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob Args: @@ -1350,7 +1363,7 @@ def __init__(self, kind, substeps): self.substeps = list(substeps) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep": """Factory: construct instance from the JSON repr. Args: @@ -1380,7 +1393,7 @@ def __init__(self): self._properties = {} @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntry": """Factory: construct instance from the JSON repr. Args: diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 0cb63292c..cd809c389 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -49,90 +49,45 @@ # the value of an option other than "--params", we do not really care about its # structure, and thus do not want to use any of the "Python tokens" for pattern matching. # -# Since token definition order is important, an OrderedDict is needed with tightly -# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs). +# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468 +# guarantees us that the order of kwargs is preserved in Python 3.6+. token_types = OrderedDict( - [ - ( - "state_parse_pos_args", - OrderedDict( - [ - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--))", # double dash - starting the options list - ), - ( - "DEST_VAR", - r"(?P[^\d\W]\w*)", # essentially a Python ID - ), - ] - ), - ), - ( - "state_parse_non_params_options", - OrderedDict( - [ - ( - "GOTO_PARSE_PARAMS_OPTION", - r"(?P(?=--params(?:\s|=|--|$)))", # the --params option - ), - ("OPTION_SPEC", r"(?P--\w+)"), - ("OPTION_EQ", r"(?P=)"), - ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"), - ] - ), - ), - ( - "state_parse_params_option", - OrderedDict( - [ - ( - "PY_STRING", - r"(?P(?:{})|(?:{}))".format( - r"'(?:[^'\\]|\.)*'", - r'"(?:[^"\\]|\.)*"', # single and double quoted strings - ), - ), - ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"), - ("PARAMS_OPT_EQ", r"(?P=)"), - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--\w+))", # found another option spec - ), - ("PY_BOOL", r"(?PTrue|False)"), - ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"), - ( - "PY_NUMBER", - r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", - ), - ("SQUOTE", r"(?P')"), - ("DQUOTE", r'(?P")'), - ("COLON", r"(?P:)"), - ("COMMA", r"(?P,)"), - ("LCURL", r"(?P\{)"), - ("RCURL", r"(?P})"), - ("LSQUARE", r"(?P\[)"), - ("RSQUARE", r"(?P])"), - ("LPAREN", r"(?P\()"), - ("RPAREN", r"(?P\))"), - ] - ), - ), - ( - "common", - OrderedDict( - [ - ("WS", r"(?P\s+)"), - ("EOL", r"(?P$)"), - ( - # anything not a whitespace or matched by something else - "UNKNOWN", - r"(?P\S+)", - ), - ] - ), + state_parse_pos_args=OrderedDict( + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--))", # double dash - starting the options list + DEST_VAR=r"(?P[^\d\W]\w*)", # essentially a Python ID + ), + state_parse_non_params_options=OrderedDict( + GOTO_PARSE_PARAMS_OPTION=r"(?P(?=--params(?:\s|=|--|$)))", # the --params option + OPTION_SPEC=r"(?P--\w+)", + OPTION_EQ=r"(?P=)", + OPT_VAL=r"(?P\S+?(?=\s|--|$))", + ), + state_parse_params_option=OrderedDict( + PY_STRING=r"(?P(?:{})|(?:{}))".format( # single and double quoted strings + r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"' ), - ] + PARAMS_OPT_SPEC=r"(?P--params(?=\s|=|--|$))", + PARAMS_OPT_EQ=r"(?P=)", + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--\w+))", # found another option spec + PY_BOOL=r"(?PTrue|False)", + DOLLAR_PY_ID=r"(?P\$[^\d\W]\w*)", + PY_NUMBER=r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", + SQUOTE=r"(?P')", + DQUOTE=r'(?P")', + COLON=r"(?P:)", + COMMA=r"(?P,)", + LCURL=r"(?P\{)", + RCURL=r"(?P})", + LSQUARE=r"(?P\[)", + RSQUARE=r"(?P])", + LPAREN=r"(?P\()", + RPAREN=r"(?P\))", + ), + common=OrderedDict( + WS=r"(?P\s+)", + EOL=r"(?P$)", + UNKNOWN=r"(?P\S+)", # anything not a whitespace or matched by something else + ), ) @@ -143,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( +TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 55846bd1a..2d3f6660f 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -279,7 +279,7 @@ def encryption_configuration(self, value): self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Model": """Factory: construct a model resource given its API representation Args: @@ -322,7 +322,7 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model. Returns: @@ -389,7 +389,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, model_id, default_project=None): + def from_string( + cls, model_id: str, default_project: str = None + ) -> "ModelReference": """Construct a model reference from model ID string. Args: @@ -417,7 +419,7 @@ def from_string(cls, model_id, default_project=None): {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model reference. Returns: diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 42547cd73..3751eb124 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -286,7 +286,7 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -297,7 +297,7 @@ def from_api_repr(cls, resource): """ raise NotImplementedError - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -329,7 +329,7 @@ def __init__(self, name, type_, value): self.value = value @classmethod - def positional(cls, type_, value): + def positional(cls, type_: str, value) -> "ScalarQueryParameter": """Factory for positional paramater. Args: @@ -347,7 +347,7 @@ def positional(cls, type_, value): return cls(None, type_, value) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -369,7 +369,7 @@ def from_api_repr(cls, resource): return cls(name, type_, converted) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -441,7 +441,7 @@ def __init__(self, name, array_type, values): self.array_type = array_type @classmethod - def positional(cls, array_type, values): + def positional(cls, array_type: str, values: list) -> "ArrayQueryParameter": """Factory for positional parameters. Args: @@ -490,7 +490,7 @@ def _from_api_repr_scalar(cls, resource): return cls(name, array_type, converted) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ArrayQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -504,7 +504,7 @@ def from_api_repr(cls, resource): return cls._from_api_repr_struct(resource) return cls._from_api_repr_scalar(resource) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -623,7 +623,7 @@ def positional(cls, *sub_params): return cls(None, *sub_params) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "StructQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -663,7 +663,7 @@ def from_api_repr(cls, resource): instance.struct_values[key] = converted return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -815,7 +815,7 @@ def total_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: - Optional[int}: Count generated on the server (None until set by the server). + Optional[int]: Count generated on the server (None until set by the server). """ total_rows = self._properties.get("totalRows") if total_rows is not None: @@ -858,7 +858,7 @@ def rows(self): Returns: Optional[List[google.cloud.bigquery.table.Row]]: - Fields describing the schema (None until set by the server). + Rows containing the results of the query. """ return _rows_from_json(self._properties.get("rows", ()), self.schema) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 20a8e7b13..5e9075fe1 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +from google.auth import exceptions as auth_exceptions import requests.exceptions @@ -27,6 +28,7 @@ exceptions.InternalServerError, exceptions.BadGateway, requests.exceptions.ConnectionError, + auth_exceptions.TransportError, ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 103799e8f..bbc0a7693 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -266,7 +266,7 @@ def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. Args: @@ -281,7 +281,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine. Returns: @@ -387,7 +387,7 @@ def data_type(self, value): self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineArgument": """Factory: construct a routine argument given its API representation. Args: @@ -401,7 +401,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine argument. Returns: @@ -438,17 +438,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] + return self._properties["projectId"] # pytype: disable=key-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] + return self._properties["datasetId"] # pytype: disable=key-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] + return self._properties["routineId"] # pytype: disable=key-error @property def path(self): @@ -460,7 +460,7 @@ def path(self): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineReference": """Factory: construct a routine reference given its API representation. Args: @@ -476,7 +476,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, routine_id, default_project=None): + def from_string( + cls, routine_id: str, default_project: str = None + ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. Args: @@ -504,7 +506,7 @@ def from_string(cls, routine_id, default_project=None): {"projectId": proj, "datasetId": dset, "routineId": routine} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine reference. Returns: diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 680dcc138..cb221d6de 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -90,7 +90,7 @@ def __init__( self._policy_tags = policy_tags @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: @@ -163,7 +163,7 @@ def policy_tags(self): """ return self._policy_tags - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. Returns: @@ -194,13 +194,14 @@ def _key(self): return ( self.name, self.field_type.upper(), - self.mode.upper(), + # Mode is always str, if not given it defaults to a str value + self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, self._policy_tags, ) - def to_standard_sql(self): + def to_standard_sql(self) -> types.StandardSqlField: """Return the field as the standard SQL field representation object. Returns: @@ -375,7 +376,7 @@ def __repr__(self): return "PolicyTagList{}".format(self._key()) @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": """Return a :class:`PolicyTagList` object deserialized from a dict. This method creates a new ``PolicyTagList`` instance that points to @@ -398,7 +399,7 @@ def from_api_repr(cls, api_repr): names = api_repr.get("names", ()) return cls(names=names) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``PolicyTagList`` diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a2366b806..b91c91a39 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -19,9 +19,10 @@ import copy import datetime import functools -import logging import operator import pytz +import typing +from typing import Any, Dict, Iterable, Tuple import warnings try: @@ -47,13 +48,14 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +if typing.TYPE_CHECKING: # pragma: NO COVER + # Unconditionally import optional dependencies again to tell pytype that + # they are not None, avoiding false "no attribute" errors. + import pandas + import pyarrow + from google.cloud import bigquery_storage -_LOGGER = logging.getLogger(__name__) -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." @@ -143,7 +145,9 @@ def path(self): ) @classmethod - def from_string(cls, table_id, default_project=None): + def from_string( + cls, table_id: str, default_project: str = None + ) -> "TableReference": """Construct a table reference from table ID string. Args: @@ -182,7 +186,7 @@ def from_string(cls, table_id, default_project=None): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "TableReference": """Factory: construct a table reference given its API representation Args: @@ -200,7 +204,7 @@ def from_api_repr(cls, resource): table_id = resource["tableId"] return cls(DatasetReference(project, dataset_id), table_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this table reference. Returns: @@ -212,7 +216,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -291,16 +295,36 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + "clustering_fields": "clustering", + "created": "creationTime", + "dataset_id": ["tableReference", "datasetId"], + "description": "description", "encryption_configuration": "encryptionConfiguration", + "etag": "etag", "expires": "expirationTime", "external_data_configuration": "externalDataConfiguration", "friendly_name": "friendlyName", + "full_table_id": "id", + "labels": "labels", + "location": "location", + "modified": "lastModifiedTime", "mview_enable_refresh": "materializedView", + "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "num_bytes": "numBytes", + "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", + "project": ["tableReference", "projectId"], + "range_partitioning": "rangePartitioning", + "time_partitioning": "timePartitioning", + "schema": "schema", + "streaming_buffer": "streamingBuffer", + "self_link": "selfLink", + "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", + "type": "type", "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", @@ -316,17 +340,23 @@ def __init__(self, table_ref, schema=None): @property def project(self): """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property def table_id(self): """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) reference = property(_reference_getter) @@ -345,11 +375,15 @@ def require_partition_filter(self): partition filter that can be used for partition elimination to be specified. """ - return self._properties.get("requirePartitionFilter") + return self._properties.get( + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ) @require_partition_filter.setter def require_partition_filter(self, value): - self._properties["requirePartitionFilter"] = value + self._properties[ + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ] = value @property def schema(self): @@ -365,7 +399,7 @@ def schema(self): is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. """ - prop = self._properties.get("schema") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) if not prop: return [] else: @@ -373,11 +407,13 @@ def schema(self): @schema.setter def schema(self, value): + api_field = self._PROPERTY_TO_API_FIELD["schema"] + if value is None: - self._properties["schema"] = None + self._properties[api_field] = None else: value = _to_schema_fields(value) - self._properties["schema"] = {"fields": _build_schema_resource(value)} + self._properties[api_field] = {"fields": _build_schema_resource(value)} @property def labels(self): @@ -390,13 +426,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault("labels", {}) + return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties["labels"] = value + self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value @property def encryption_configuration(self): @@ -410,7 +446,9 @@ def encryption_configuration(self): `_ in the BigQuery documentation. """ - prop = self._properties.get("encryptionConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ) if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -420,14 +458,16 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["encryptionConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ] = api_repr @property def created(self): """Union[datetime.datetime, None]: Datetime at which the table was created (:data:`None` until set from the server). """ - creation_time = self._properties.get("creationTime") + creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -439,14 +479,14 @@ def etag(self): """Union[str, None]: ETag for the table resource (:data:`None` until set from the server). """ - return self._properties.get("etag") + return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the table was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get("lastModifiedTime") + modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -458,21 +498,25 @@ def num_bytes(self): """Union[int, None]: The size of the table in bytes (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numBytes")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) + ) @property def num_rows(self): """Union[int, None]: The number of rows in the table (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numRows")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) + ) @property def self_link(self): """Union[str, None]: URL for the table resource (:data:`None` until set from the server). """ - return self._properties.get("selfLink") + return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) @property def full_table_id(self): @@ -481,7 +525,7 @@ def full_table_id(self): In the format ``project-id:dataset_id.table_id``. """ - return self._properties.get("id") + return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) @property def table_type(self): @@ -491,7 +535,7 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or ``'EXTERNAL'``. """ - return self._properties.get("type") + return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) @property def range_partitioning(self): @@ -512,7 +556,9 @@ def range_partitioning(self): :class:`~google.cloud.bigquery.table.RangePartitioning` or :data:`None`. """ - resource = self._properties.get("rangePartitioning") + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["range_partitioning"] + ) if resource is not None: return RangePartitioning(_properties=resource) @@ -525,7 +571,7 @@ def range_partitioning(self, value): raise ValueError( "Expected value to be RangePartitioning or None, got {}.".format(value) ) - self._properties["rangePartitioning"] = resource + self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource @property def time_partitioning(self): @@ -542,7 +588,7 @@ def time_partitioning(self): :class:`~google.cloud.bigquery.table.TimePartitioning` or :data:`None`. """ - prop = self._properties.get("timePartitioning") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -555,7 +601,7 @@ def time_partitioning(self, value): raise ValueError( "value must be google.cloud.bigquery.table.TimePartitioning " "or None" ) - self._properties["timePartitioning"] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr @property def partitioning_type(self): @@ -580,9 +626,10 @@ def partitioning_type(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] if self.time_partitioning is None: - self._properties["timePartitioning"] = {} - self._properties["timePartitioning"]["type"] = value + self._properties[api_field] = {} + self._properties[api_field]["type"] = value @property def partition_expiration(self): @@ -609,9 +656,11 @@ def partition_expiration(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] + if self.time_partitioning is None: - self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY} - self._properties["timePartitioning"]["expirationMs"] = str(value) + self._properties[api_field] = {"type": TimePartitioningType.DAY} + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -626,7 +675,7 @@ def clustering_fields(self): BigQuery supports clustering for both partitioned and non-partitioned tables. """ - prop = self._properties.get("clustering") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) if prop is not None: return list(prop.get("fields", ())) @@ -636,12 +685,15 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ + api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] + if value is not None: - prop = self._properties.setdefault("clustering", {}) + prop = self._properties.setdefault(api_field, {}) prop["fields"] = value else: - if "clustering" in self._properties: - del self._properties["clustering"] + # In order to allow unsetting clustering fields completely, we explicitly + # set this property to None (as oposed to merely removing the key). + self._properties[api_field] = None @property def description(self): @@ -651,13 +703,13 @@ def description(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("description") + return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) @description.setter def description(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["description"] = value + self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value @property def expires(self): @@ -667,7 +719,7 @@ def expires(self): Raises: ValueError: For invalid value types. """ - expiration_time = self._properties.get("expirationTime") + expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) if expiration_time is not None: # expiration_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -679,7 +731,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties["expirationTime"] = _helpers._str_or_none(value_ms) + self._properties[ + self._PROPERTY_TO_API_FIELD["expires"] + ] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -688,13 +742,13 @@ def friendly_name(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("friendlyName") + return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) @friendly_name.setter def friendly_name(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["friendlyName"] = value + self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value @property def location(self): @@ -702,7 +756,7 @@ def location(self): Defaults to :data:`None`. """ - return self._properties.get("location") + return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) @property def view_query(self): @@ -715,14 +769,17 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - return _helpers._get_sub_prop(self._properties, ["view", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, str): raise ValueError("Pass a string") - _helpers._set_sub_prop(self._properties, ["view", "query"], value) - view = self._properties["view"] + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], value) + view = self._properties[api_field] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -731,7 +788,7 @@ def view_query(self, value): @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" - self._properties.pop("view", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -739,27 +796,29 @@ def view_query(self): def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") - if self._properties.get("view") is None: - self._properties["view"] = {} - self._properties["view"]["useLegacySql"] = value + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + if self._properties.get(api_field) is None: + self._properties[api_field] = {} + self._properties[api_field]["useLegacySql"] = value @property def mview_query(self): """Optional[str]: SQL query defining the table as a materialized view (defaults to :data:`None`). """ - return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @mview_query.setter def mview_query(self, value): - _helpers._set_sub_prop( - self._properties, ["materializedView", "query"], str(value) - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) @mview_query.deleter def mview_query(self): """Delete SQL query defining the table as a materialized view.""" - self._properties.pop("materializedView", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) @property def mview_last_refresh_time(self): @@ -767,7 +826,7 @@ def mview_last_refresh_time(self): refreshed (:data:`None` until set from the server). """ refresh_time = _helpers._get_sub_prop( - self._properties, ["materializedView", "lastRefreshTime"] + self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] ) if refresh_time is not None: # refresh_time will be in milliseconds. @@ -780,14 +839,14 @@ def mview_enable_refresh(self): """Optional[bool]: Enable automatic refresh of the materialized view when the base table is updated. The default value is :data:`True`. """ - return _helpers._get_sub_prop( - self._properties, ["materializedView", "enableRefresh"] - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] + return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) @mview_enable_refresh.setter def mview_enable_refresh(self, value): + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] return _helpers._set_sub_prop( - self._properties, ["materializedView", "enableRefresh"], value + self._properties, [api_field, "enableRefresh"], value ) @property @@ -796,8 +855,9 @@ def mview_refresh_interval(self): materialized view will be refreshed. The default value is 1800000 milliseconds (30 minutes). """ + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] refresh_interval = _helpers._get_sub_prop( - self._properties, ["materializedView", "refreshIntervalMs"] + self._properties, [api_field, "refreshIntervalMs"] ) if refresh_interval is not None: return datetime.timedelta(milliseconds=int(refresh_interval)) @@ -809,10 +869,9 @@ def mview_refresh_interval(self, value): else: refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, - ["materializedView", "refreshIntervalMs"], - refresh_interval_ms, + self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, ) @property @@ -820,7 +879,7 @@ def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's streaming buffer. """ - sb = self._properties.get("streamingBuffer") + sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) if sb is not None: return StreamingBuffer(sb) @@ -832,7 +891,9 @@ def external_data_configuration(self): Raises: ValueError: For invalid value types. """ - prop = self._properties.get("externalDataConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ) if prop is not None: prop = ExternalConfig.from_api_repr(prop) return prop @@ -844,10 +905,12 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["externalDataConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ] = api_repr @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. Args: @@ -871,7 +934,7 @@ def from_string(cls, full_table_id): return cls(TableReference.from_string(full_table_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Table": """Factory: construct a table given its API representation Args: @@ -897,9 +960,15 @@ def from_api_repr(cls, resource): "Resource lacks required identity information:" '["tableReference"]["tableId"]' ) - project_id = resource["tableReference"]["projectId"] - table_id = resource["tableReference"]["tableId"] - dataset_id = resource["tableReference"]["datasetId"] + project_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["project"] + ) + table_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["table_id"] + ) + dataset_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] + ) dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) @@ -907,7 +976,7 @@ def from_api_repr(cls, resource): return table - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -915,7 +984,7 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1104,7 +1173,7 @@ def clustering_fields(self): return list(prop.get("fields", ())) @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "TableListItem": """Construct a table from fully-qualified table ID. Args: @@ -1129,7 +1198,7 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1137,7 +1206,7 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -1231,7 +1300,7 @@ def values(self): """ return copy.deepcopy(self._xxx_values) - def keys(self): + def keys(self) -> Iterable[str]: """Return the keys for using a row as a dict. Returns: @@ -1244,7 +1313,7 @@ def keys(self): """ return self._xxx_field_to_index.keys() - def items(self): + def items(self) -> Iterable[Tuple[str, Any]]: """Return items as ``(key, value)`` pairs. Returns: @@ -1259,7 +1328,7 @@ def items(self): for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) - def get(self, key, default=None): + def get(self, key: str, default: Any = None) -> Any: """Return a value for key, with a default value if it does not exist. Args: @@ -1490,13 +1559,12 @@ def _to_page_iterable( if not self._validate_bqstorage(bqstorage_client, False): bqstorage_client = None - if bqstorage_client is not None: - for item in bqstorage_download(): - yield item - return - - for item in tabledata_list_download(): - yield item + result_pages = ( + bqstorage_download() + if bqstorage_client is not None + else tabledata_list_download() + ) + yield from result_pages def _to_arrow_iterable(self, bqstorage_client=None): """Create an iterable of arrow RecordBatches, to process the table as a stream.""" @@ -1521,10 +1589,10 @@ def _to_arrow_iterable(self, bqstorage_client=None): # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1622,7 +1690,12 @@ def to_arrow( arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) - def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + def to_dataframe_iterable( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1642,6 +1715,17 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + ..versionadded:: 2.14.0 + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -1665,6 +1749,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): dtypes, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -1682,12 +1767,12 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. Args: @@ -1815,7 +1900,7 @@ def to_arrow( progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, - ): + ) -> "pyarrow.Table": """[Beta] Create an empty class:`pyarrow.Table`. Args: @@ -1837,7 +1922,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, - ): + ) -> "pandas.DataFrame": """Create an empty dataframe. Args: @@ -2148,7 +2233,7 @@ def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": """Return a :class:`TimePartitioning` object deserialized from a dict. This method creates a new ``TimePartitioning`` instance that points to @@ -2176,7 +2261,7 @@ def from_api_repr(cls, api_repr): instance._properties = api_repr return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``TimePartitioning`` diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2330d0c2c..ba8b4e8af 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.1" +__version__ = "2.14.0" diff --git a/noxfile.py b/noxfile.py index df36d237e..7ba081660 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,6 +21,7 @@ import nox +PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -31,6 +32,7 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ + "unit_noextras", "unit", "system", "snippets", @@ -38,11 +40,12 @@ "lint", "lint_setup_py", "blacken", + "pytype", "docs", ] -def default(session): +def default(session, install_extras=True): """Default unit test session. This is intended to be run **without** an interpreter set, so @@ -65,7 +68,8 @@ def default(session): constraints_path, ) - session.install("-e", ".[all]", "-c", constraints_path) + install_target = ".[all]" if install_extras else "." + session.install("-e", install_target, "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -90,6 +94,21 @@ def unit(session): default(session) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_noextras(session): + """Run the unit test suite.""" + default(session, install_extras=False) + + +@nox.session(python=DEFAULT_PYTHON_VERSION) +def pytype(session): + """Run type checks.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(PYTYPE_VERSION) + session.run("pytype") + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" @@ -275,7 +294,7 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( diff --git a/renovate.json b/renovate.json index f08bc22c9..c04895563 100644 --- a/renovate.json +++ b/renovate.json @@ -2,5 +2,8 @@ "extends": [ "config:base", ":preserveSemverRanges" ], - "ignorePaths": [".pre-commit-config.yaml"] + "ignorePaths": [".pre-commit-config.yaml"], + "pip_requirements": { + "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] + } } diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 676ff949e..299d90b65 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 -mock==4.0.2 +pytest==6.2.3 +mock==4.0.3 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c5f60911e..96819343c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 +google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index d22a33318..31c6ba104 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,10 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random + from google.cloud import bigquery import pytest +RESOURCE_PREFIX = "python_bigquery_samples_snippets" + + +def resource_prefix() -> str: + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + random_string = hex(random.randrange(1000000))[2:] + return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + for dataset in bigquery_client.list_datasets(): + if ( + dataset.dataset_id.startswith(RESOURCE_PREFIX) + and dataset.created < yesterday + ): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + @pytest.fixture(scope="session") def bigquery_client(): bigquery_client = bigquery.Client() @@ -25,3 +50,18 @@ def bigquery_client(): @pytest.fixture(scope="session") def project_id(bigquery_client): return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client: bigquery.Client, project_id: str): + dataset_id = resource_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 676ff949e..299d90b65 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 -mock==4.0.2 +pytest==6.2.3 +mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index abbe6fde4..74a18981e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,10 +1,11 @@ -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.3 -grpcio==1.36.1 +google-auth-oauthlib==0.4.4 +grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.4 +matplotlib==3.3.4; python_version < '3.7' +matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py new file mode 100644 index 000000000..3cca7a649 --- /dev/null +++ b/samples/snippets/test_update_with_dml.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +from conftest import resource_prefix +import update_with_dml + + +@pytest.fixture +def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + table_id = f"{resource_prefix()}_update_with_dml" + yield table_id + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): + override_values = { + "dataset_id": dataset_id, + "table_id": table_id, + } + num_rows = update_with_dml.run_sample(override_values=override_values) + assert num_rows > 0 diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py new file mode 100644 index 000000000..7fd09dd80 --- /dev/null +++ b/samples/snippets/update_with_dml.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_update_with_dml] +import pathlib + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def load_from_newline_delimited_json( + client: bigquery.Client, + filepath: pathlib.Path, + project_id: str, + dataset_id: str, + table_id: str, +): + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + job_config = bigquery.LoadJobConfig() + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = [ + bigquery.SchemaField("id", enums.SqlTypeNames.STRING), + bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER), + bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING), + ] + + with open(filepath, "rb") as json_file: + load_job = client.load_table_from_file( + json_file, full_table_id, job_config=job_config + ) + + # Wait for load job to finish. + load_job.result() + + +def update_with_dml( + client: bigquery.Client, project_id: str, dataset_id: str, table_id: str +): + query_text = f""" + UPDATE `{project_id}.{dataset_id}.{table_id}` + SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") + WHERE TRUE + """ + query_job = client.query(query_text) + + # Wait for query job to finish. + query_job.result() + + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") + return query_job.num_dml_affected_rows + + +def run_sample(override_values={}): + client = bigquery.Client() + filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" + project_id = client.project + dataset_id = "sample_db" + table_id = "UserSessions" + # [END bigquery_update_with_dml] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + # [START bigquery_update_with_dml] + load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id) + return update_with_dml(client, project_id, dataset_id, table_id) + + +# [END bigquery_update_with_dml] diff --git a/samples/snippets/user_sessions_data.json b/samples/snippets/user_sessions_data.json new file mode 100644 index 000000000..7ea3715ad --- /dev/null +++ b/samples/snippets/user_sessions_data.json @@ -0,0 +1,10 @@ +{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} +{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} +{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} +{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} +{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} +{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} +{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} +{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} +{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} +{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} diff --git a/setup.cfg b/setup.cfg index c3a2b39f6..8eefc4435 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,3 +17,17 @@ # Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 + +[pytype] +python_version = 3.8 +inputs = + google/cloud/ +exclude = + tests/ + google/cloud/bigquery_v2/ +output = .pytype/ +disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error diff --git a/setup.py b/setup.py index 12a9bde31..607ffb63f 100644 --- a/setup.py +++ b/setup.py @@ -53,9 +53,9 @@ "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.11b0", - "opentelemetry-sdk==0.11b0", - "opentelemetry-instrumentation==0.11b0", + "opentelemetry-api >= 0.11b0", + "opentelemetry-sdk >= 0.11b0", + "opentelemetry-instrumentation >= 0.11b0", ], } diff --git a/synth.metadata b/synth.metadata index 2425b03fb..b031618b0 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" + "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } } ], diff --git a/synth.py b/synth.py index 3c6440600..d99f368cc 100644 --- a/synth.py +++ b/synth.py @@ -13,6 +13,7 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +import textwrap import synthtool as s from synthtool import gcp @@ -120,4 +121,32 @@ '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) +# ---------------------------------------------------------------------------- +# pytype-related changes +# ---------------------------------------------------------------------------- + +# Add .pytype to .gitignore +s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") + +# Add pytype config to setup.cfg +s.replace( + "setup.cfg", + r"universal = 1", + textwrap.dedent(""" \g<0> + + [pytype] + python_version = 3.8 + inputs = + google/cloud/ + exclude = + tests/ + google/cloud/bigquery_v2/ + output = .pytype/ + disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error""") +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 133f609a6..7c8ef50fa 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,10 +25,10 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest -import pkg_resources from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -63,6 +63,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -89,6 +90,12 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +CLUSTERING_SCHEMA = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("body_height_cm", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"), +] TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"), bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"), @@ -116,13 +123,6 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") - -if pyarrow: - PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version -else: - PYARROW_INSTALLED_VERSION = None - MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" @@ -131,7 +131,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): @@ -150,7 +150,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -197,7 +197,9 @@ def test_get_service_account_email(self): def _create_bucket(self, bucket_name, location=None): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - retry_storage_errors(bucket.create)(location=location) + retry_storage_errors(storage_client.create_bucket)( + bucket_name, location=location + ) self.to_delete.append(bucket) return bucket @@ -436,6 +438,22 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job_metadata(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job_metadata(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" @@ -567,6 +585,25 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_update_table_clustering_configuration(self): + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=CLUSTERING_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + table.clustering_fields = ["full_name", "date_of_birth"] + table2 = Config.CLIENT.update_table(table, ["clustering_fields"]) + self.assertEqual(table2.clustering_fields, ["full_name", "date_of_birth"]) + + table2.clustering_fields = None + table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) + self.assertIsNone(table3.clustering_fields, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) @@ -880,7 +917,7 @@ def test_load_table_from_file_w_explicit_location(self): job_id = load_job.job_id # Can get the job from the EU. - load_job = client.get_job(job_id, location="EU") + load_job = client.get_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) self.assertTrue(load_job.exists()) @@ -897,7 +934,7 @@ def test_load_table_from_file_w_explicit_location(self): # Can cancel the job from the EU. self.assertTrue(load_job.cancel()) - load_job = client.cancel_job(job_id, location="EU") + load_job = client.cancel_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) @@ -1212,8 +1249,7 @@ def test_query_w_timeout(self): # Even though the query takes >1 second, the call to getQueryResults # should succeed. self.assertFalse(query_job.done(timeout=1)) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) def test_query_w_page_size(self): page_size = 45 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 000000000..7a67ea6b5 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,37 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .helpers import make_client + + +@pytest.fixture +def client(): + yield make_client() + + +@pytest.fixture +def PROJECT(): + yield "PROJECT" + + +@pytest.fixture +def DS_ID(): + yield "DATASET_ID" + + +@pytest.fixture +def LOCATION(): + yield "us-central" diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index b51b0bbb7..67aeaca35 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import google.cloud.bigquery.client +import google.cloud.bigquery.dataset +import mock +import pytest + def make_connection(*responses): import google.cloud.bigquery._http @@ -31,3 +36,47 @@ def _to_pyarrow(value): import pyarrow return pyarrow.array([value])[0] + + +def make_client(project="PROJECT", **kw): + credentials = mock.Mock(spec=google.auth.credentials.Credentials) + return google.cloud.bigquery.client.Client(project, credentials, **kw) + + +def make_dataset_reference_string(project, ds_id): + return f"{project}.{ds_id}" + + +def make_dataset(project, ds_id): + return google.cloud.bigquery.dataset.Dataset( + google.cloud.bigquery.dataset.DatasetReference(project, ds_id) + ) + + +def make_dataset_list_item(project, ds_id): + return google.cloud.bigquery.dataset.DatasetListItem( + dict(datasetReference=dict(projectId=project, datasetId=ds_id)) + ) + + +def identity(x): + return x + + +def get_reference(x): + return x.reference + + +dataset_like = [ + (google.cloud.bigquery.dataset.DatasetReference, identity), + (make_dataset, identity), + (make_dataset_list_item, get_reference), + ( + make_dataset_reference_string, + google.cloud.bigquery.dataset.DatasetReference.from_string, + ), +] + +dataset_polymorphic = pytest.mark.parametrize( + "make_dataset,get_reference", dataset_like +) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d1600ad43..0f9623203 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,6 +41,22 @@ from .helpers import _make_job_resource +@pytest.fixture +def table_read_options_kwarg(): + # Create a BigQuery Storage table read options object with pyarrow compression + # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is + # installed to support the compression. + if not hasattr(bigquery_storage, "ArrowSerializationOptions"): + return {} + + read_options = bigquery_storage.ReadSession.TableReadOptions( + arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( + buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + ) + return {"read_options": read_options} + + @pytest.mark.parametrize( "query,expected", ( @@ -82,7 +98,7 @@ def test__contains_order_by(query, expected): "SelecT name, age froM table OrdeR \n\t BY other_column;", ), ) -def test_to_dataframe_bqstorage_preserve_order(query): +def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class job_resource = _make_job_resource( @@ -123,8 +139,10 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", @@ -431,7 +449,7 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_to_dataframe_bqstorage(): +def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class resource = _make_job_resource(job_type="query", ended=True) @@ -468,8 +486,10 @@ def test_to_dataframe_bqstorage(): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -478,6 +498,52 @@ def test_to_dataframe_bqstorage(): ) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage_no_pyarrow_compression(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [{"name": "name", "type": ["null", "string"]}], + } + ) + bqstorage_client.create_read_session.return_value = session + + with mock.patch( + "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT", new=False + ): + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.ReadSession( + table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index abd725820..39a3d845b 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -17,6 +17,7 @@ import decimal import functools import operator +import queue import warnings import mock @@ -41,6 +42,11 @@ from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -1265,6 +1271,66 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.parametrize( + "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", + [ + (3, {"max_queue_size": 2}, 3, 2), # custom queue size + (4, {}, 4, 4), # default queue size + (7, {"max_queue_size": None}, 7, 0), # infinite queue size + ], +) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage( + module_under_test, + stream_count, + maxsize_kwarg, + expected_call_count, + expected_maxsize, +): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + + queue_used = None # A reference to the queue used by code under test. + + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=["stream/s{i}" for i in range(stream_count)]) + bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), "table-z", + ) + + def fake_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + nonlocal queue_used + queue_used = worker_queue + try: + worker_queue.put_nowait("result_page") + except queue.Full: # pragma: NO COVER + pass + + download_stream = mock.Mock(side_effect=fake_download_stream) + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=download_stream + ): + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, bqstorage_client, **maxsize_kwarg + ) + list(result_gen) + + # Timing-safe, as the method under test should block until the pool shutdown is + # complete, at which point all download stream workers have already been submitted + # to the thread pool. + assert download_stream.call_count == stream_count # once for each stream + assert queue_used.maxsize == expected_maxsize + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( @@ -1398,6 +1464,7 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) result = next(results_gen) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 26ef340de..8f535145b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -65,7 +65,12 @@ from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") def _make_credentials(): @@ -856,698 +861,265 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - def test_create_dataset_minimal(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_routine_w_minimal_resource(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - } creds = _make_credentials() + path = "/projects/test-routine-project/datasets/test_routines/routines" + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) + conn = client._connection = make_connection(resource) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + actual_routine = client.create_routine(routine, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % PATH, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - }, - timeout=7.5, + method="POST", path=path, data=resource, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, RoutineReference.from_string(full_routine_id) ) - def test_create_dataset_w_attrs(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry + def test_create_routine_w_conflict(self): + from google.cloud.bigquery.routine import Routine - PATH = "projects/%s/datasets" % self.PROJECT - DESCRIPTION = "DESC" - FRIENDLY_NAME = "FN" - LOCATION = "US" - USER_EMAIL = "phred@example.com" - LABELS = {"color": "red"} - VIEW = { - "projectId": "my-proj", - "datasetId": "starry-skies", - "tableId": "northern-hemisphere", + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_routine(routine) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "labels": LABELS, - "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") + def test_span_status_is_set(self): + from google.cloud.bigquery.routine import Routine + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_routine(routine) + + span_list = memory_exporter.get_finished_spans() + self.assertTrue(span_list[0].status is not None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + def test_create_routine_w_conflict_exists_ok(self): + from google.cloud.bigquery.routine import Routine + creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - entries = [ - AccessEntry("OWNER", "userByEmail", USER_EMAIL), - AccessEntry(None, "view", VIEW), - ] + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + path = "/projects/test-routine-project/datasets/test_routines/routines" - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.access_entries = entries - before.description = DESCRIPTION - before.friendly_name = FRIENDLY_NAME - before.default_table_expiration_ms = 3600 - before.location = LOCATION - before.labels = LABELS + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists"), resource + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) + actual_routine = client.create_routine(routine, exists_ok=True) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + final_attributes.assert_called_with( + {"path": "%s/minimal_routine" % path}, client, None + ) + + self.assertEqual(actual_routine.project, "test-routine-project") + self.assertEqual(actual_routine.dataset_id, "test_routines") + self.assertEqual(actual_routine.routine_id, "minimal_routine") + conn.api_request.assert_has_calls( + [ + mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=None, + ), + ] + ) + + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.description, DESCRIPTION) - self.assertEqual(after.friendly_name, FRIENDLY_NAME) - self.assertEqual(after.location, LOCATION) - self.assertEqual(after.default_table_expiration_ms, 3600) - self.assertEqual(after.labels, LABELS) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "access": [ - {"role": "OWNER", "userByEmail": USER_EMAIL}, - {"view": VIEW}, - ], - "labels": LABELS, + "timePartitioning": {"type": "DAY"}, + "labels": {}, }, - timeout=None, + timeout=7.5, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_custom_property(self): + def test_create_table_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" conn = client._connection = make_connection(resource) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before._properties["newAlphaProperty"] = "unreleased property" + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after._properties["newAlphaProperty"], "unreleased property") + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "newAlphaProperty": "unreleased property", "labels": {}, }, timeout=None, ) + self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_wo_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_encryption_configuration(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + from google.cloud.bigquery.table import Table - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_w_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset - - PATH = "projects/%s/datasets" % self.PROJECT - OTHER_LOCATION = "EU" - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": OTHER_LOCATION, - } - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.location = OTHER_LOCATION - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, OTHER_LOCATION) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % PATH, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": OTHER_LOCATION, - }, - timeout=None, - ) - - def test_create_dataset_w_reference(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ) - - def test_create_dataset_w_fully_qualified_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ) - - def test_create_dataset_w_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.create_dataset(self.DS_ID) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ) - - def test_create_dataset_alreadyexists_w_exists_ok_false(self): - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_dataset(self.DS_ID) - - def test_create_dataset_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets".format(self.PROJECT) - get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "{}:{}".format(self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists"), resource - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.create_dataset(self.DS_ID, exists_ok=True) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) - - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] - ) - - def test_create_routine_w_minimal_resource(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference - - creds = _make_credentials() - path = "/projects/test-routine-project/datasets/test_routines/routines" - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.create_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, RoutineReference.from_string(full_routine_id) - ) - - def test_create_routine_w_conflict(self): - from google.cloud.bigquery.routine import Routine - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") - ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.create_routine(routine) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, - ) - - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") - def test_span_status_is_set(self): - from google.cloud.bigquery.routine import Routine - - tracer_provider = TracerProvider() - memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) - tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") - ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_routine(routine) - - span_list = memory_exporter.get_finished_spans() - self.assertTrue(span_list[0].status is not None) - - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, - ) - - def test_create_routine_w_conflict_exists_ok(self): - from google.cloud.bigquery.routine import Routine - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - path = "/projects/test-routine-project/datasets/test_routines/routines" - - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists"), resource - ) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.create_routine(routine, exists_ok=True) - - final_attributes.assert_called_with( - {"path": "%s/minimal_routine" % path}, client, None - ) - - self.assertEqual(actual_routine.project, "test-routine-project") - self.assertEqual(actual_routine.dataset_id, "test_routines") - self.assertEqual(actual_routine.routine_id, "minimal_routine") - conn.api_request.assert_has_calls( - [ - mock.call(method="POST", path=path, data=resource, timeout=None,), - mock.call( - method="GET", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, - ), - ] - ) - - def test_create_table_w_day_partition(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning - - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table, timeout=7.5) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY"}, - "labels": {}, - }, - timeout=7.5, - ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.table import Table - - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_encryption_configuration(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - from google.cloud.bigquery.table import Table - - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_day_partition_and_expire(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() @@ -2782,577 +2354,208 @@ def test_update_table_w_query(self): "location": location, "expirationTime": _millis(exp_time), } - ) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.expires = exp_time - table.view_query = query - table.view_use_legacy_sql = True - updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_table = client.update_table(table, updated_properties) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, - ) - - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.view_query, table.view_query) - self.assertEqual(updated_table.expires, table.expires) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - self.assertEqual(updated_table.location, location) - - conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={ - "view": {"query": query, "useLegacySql": True}, - "expirationTime": str(_millis(exp_time)), - "schema": schema_resource, - }, - headers=None, - timeout=None, - ) - - def test_update_table_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - resource1 = self._make_table_resource() - resource1.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } - } - ) - resource2 = self._make_table_resource() - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table( - # Test with string for table ID - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ) - ) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - table.schema = None - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["schema"]}, client, None - ) - - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} - self.assertEqual(req[1]["data"], sent) - self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) - - def test_update_table_delete_property(self): - from google.cloud.bigquery.table import Table - - description = "description" - title = "title" - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - resource1 = self._make_table_resource() - resource1.update({"description": description, "friendlyName": title}) - resource2 = self._make_table_resource() - resource2["description"] = None - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - table = Table(self.TABLE_REF) - table.description = description - table.friendly_name = title - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table2 = client.update_table(table, ["description", "friendly_name"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, - client, - None, - ) - - self.assertEqual(table2.description, table.description) - table2.description = None - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table3 = client.update_table(table2, ["description"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description"]}, client, None - ) - - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - self.assertEqual(req[1]["path"], "/%s" % path) - sent = {"description": None} - self.assertEqual(req[1]["data"], sent) - self.assertIsNone(table3.description) - - def test_list_tables_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - dataset = DatasetReference(self.PROJECT, self.DS_ID) - iterator = client.list_tables(dataset, timeout=7.5) - self.assertIs(iterator.dataset, dataset) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(tables, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 - ) - - def test_list_models_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) - iterator = client.list_models(dataset_id, timeout=7.5) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - models = list(page) - token = iterator.next_page_token - - self.assertEqual(models, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 - ) - - def test_list_models_defaults(self): - from google.cloud.bigquery.model import Model - - MODEL_1 = "model_one" - MODEL_2 = "model_two" - PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "models": [ - { - "modelReference": { - "modelId": MODEL_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - { - "modelReference": { - "modelId": MODEL_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - ], - } - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_models(dataset) - self.assertIs(iterator.dataset, dataset) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - models = list(page) - token = iterator.next_page_token - - self.assertEqual(len(models), len(DATA["models"])) - for found, expected in zip(models, DATA["models"]): - self.assertIsInstance(found, Model) - self.assertEqual(found.model_id, expected["modelReference"]["modelId"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None - ) - - def test_list_models_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) - - def test_list_routines_empty_w_timeout(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - iterator = client.list_routines("test-routines.test_routines", timeout=7.5) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with( - {"path": "/projects/test-routines/datasets/test_routines/routines"}, - client, - None, - ) - routines = list(page) - token = iterator.next_page_token - - self.assertEqual(routines, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/test-routines/datasets/test_routines/routines", - query_params={}, - timeout=7.5, - ) - - def test_list_routines_defaults(self): - from google.cloud.bigquery.routine import Routine - - project_id = "test-routines" - dataset_id = "test_routines" - path = "/projects/test-routines/datasets/test_routines/routines" - routine_1 = "routine_one" - routine_2 = "routine_two" - token = "TOKEN" - resource = { - "nextPageToken": token, - "routines": [ - { - "routineReference": { - "routineId": routine_1, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - { - "routineReference": { - "routineId": routine_2, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - ], - } - + ) creds = _make_credentials() - client = self._make_one(project=project_id, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = DatasetReference(client.project, dataset_id) - - iterator = client.list_routines(dataset) - self.assertIs(iterator.dataset, dataset) + table = Table(self.TABLE_REF, schema=schema) + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + updated_table = client.update_table(table, updated_properties) - final_attributes.assert_called_once_with({"path": path}, client, None) - routines = list(page) - actual_token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": updated_properties}, client, None, + ) - self.assertEqual(len(routines), len(resource["routines"])) - for found, expected in zip(routines, resource["routines"]): - self.assertIsInstance(found, Routine) - self.assertEqual( - found.routine_id, expected["routineReference"]["routineId"] - ) - self.assertEqual(actual_token, token) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.location, location) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={ + "view": {"query": query, "useLegacySql": True}, + "expirationTime": str(_millis(exp_time)), + "schema": schema_resource, + }, + headers=None, + timeout=None, ) - def test_list_routines_wrong_type(self): + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } + ) + resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_routines( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") + conn = client._connection = make_connection(resource1, resource2) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table( + # Test with string for table ID + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) ) - def test_list_tables_defaults(self): - from google.cloud.bigquery.table import TableListItem - - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "tables": [ - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - ], - } + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) + table.schema = None - iterator = client.list_tables(dataset) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertEqual(token, TOKEN) + updated_table = client.update_table(table, ["schema"]) - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["schema"]}, client, None ) - def test_list_tables_explicit(self): - from google.cloud.bigquery.table import TableListItem + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + sent = {"schema": None} + self.assertEqual(req[1]["data"], sent) + self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(len(updated_table.schema), 0) - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "tables": [ - { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - ] - } + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table + description = "description" + title = "title" + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update({"description": description, "friendlyName": title}) + resource2 = self._make_table_resource() + resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) + conn = client._connection = make_connection(resource1, resource2) + table = Table(self.TABLE_REF) + table.description = description + table.friendly_name = title - iterator = client.list_tables( - # Test with string for dataset ID. - self.DS_ID, - max_results=3, - page_token=TOKEN, - ) - self.assertEqual(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token - - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertIsNone(token) + table2 = client.update_table(table, ["description", "friendly_name"]) - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, + client, + None, ) - def test_list_tables_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) - - def test_delete_dataset(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import DatasetReference - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID)) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(*([{}] * len(datasets))) - for arg in datasets: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, timeout=7.5) - - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH}, client, None - ) - - conn.api_request.assert_called_with( - method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 - ) - - def test_delete_dataset_delete_contents(self): - from google.cloud.bigquery.dataset import Dataset + self.assertEqual(table2.description, table.description) + table2.description = None - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}, {}) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - for arg in (ds_ref, Dataset(ds_ref)): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, delete_contents=True) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table3 = client.update_table(table2, ["description"]) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "deleteContents": True}, client, None - ) - conn.api_request.assert_called_with( - method="DELETE", - path="/%s" % PATH, - query_params={"deleteContents": "true"}, - timeout=None, - ) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description"]}, client, None + ) - def test_delete_dataset_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.delete_dataset( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + self.assertEqual(req[1]["path"], "/%s" % path) + sent = {"description": None} + self.assertEqual(req[1]["data"], sent) + self.assertIsNone(table3.description) - def test_delete_dataset_w_not_found_ok_false(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) + def test_delete_job_metadata_not_found(self): creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), ) with self.assertRaises(google.api_core.exceptions.NotFound): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(self.DS_ID) + client.delete_job_metadata("my-job") - final_attributes.assert_called_once_with({"path": path}, client, None) + conn.api_request.reset_mock() + client.delete_job_metadata("my-job", not_found_ok=True) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, ) - def test_delete_dataset_w_not_found_ok_true(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) + def test_delete_job_metadata_with_id(self): creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job_metadata("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(self.DS_ID, not_found_ok=True) + def test_delete_job_metadata_with_resource(self): + from google.cloud.bigquery.job import QueryJob - final_attributes.assert_called_once_with({"path": path}, client, None) + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + client.delete_job_metadata(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, ) def test_delete_model(self): @@ -3795,31 +2998,30 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.get_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + query_params={"projection": "full"}, timeout=None, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" creds = _make_credentials() - client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT) + client.get_job(JOB_ID) conn.api_request.assert_called_once_with( method="GET", - path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + path="/projects/client-proj/jobs/NONESUCH", + query_params={"projection": "full", "location": "client-loc"}, timeout=None, ) @@ -3833,7 +3035,11 @@ def test_get_job_hit_w_timeout(self): QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "resource-proj", + "jobId": "query_job", + "location": "us-east1", + }, "state": "DONE", "configuration": { "query": { @@ -3851,18 +3057,21 @@ def test_get_job_hit_w_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(ASYNC_QUERY_DATA) + job_from_resource = QueryJob.from_api_repr(ASYNC_QUERY_DATA, client) - job = client.get_job(JOB_ID, timeout=7.5) + job = client.get_job(job_from_resource, timeout=7.5) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "resource-proj") + self.assertEqual(job.location, "us-east1") self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED) self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) conn.api_request.assert_called_once_with( method="GET", - path="/projects/PROJECT/jobs/query_job", - query_params={"projection": "full"}, + path="/projects/resource-proj/jobs/query_job", + query_params={"projection": "full", "location": "us-east1"}, timeout=7.5, ) @@ -3911,7 +3120,11 @@ def test_cancel_job_hit(self): QUERY = "SELECT * from test_dataset:test_table" QUERY_JOB_RESOURCE = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "asia-northeast1", + }, "state": "RUNNING", "configuration": {"query": {"query": QUERY}}, } @@ -3919,17 +3132,20 @@ def test_cancel_job_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(RESOURCE) + job_from_resource = QueryJob.from_api_repr(QUERY_JOB_RESOURCE, client) - job = client.cancel_job(JOB_ID) + job = client.cancel_job(job_from_resource) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "job-based-proj") + self.assertEqual(job.location, "asia-northeast1") self.assertEqual(job.query, QUERY) conn.api_request.assert_called_once_with( method="POST", - path="/projects/PROJECT/jobs/query_job/cancel", - query_params={"projection": "full"}, + path="/projects/job-based-proj/jobs/query_job/cancel", + query_params={"projection": "full", "location": "asia-northeast1"}, timeout=None, ) diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py new file mode 100644 index 000000000..d07aaed4f --- /dev/null +++ b/tests/unit/test_create_dataset.py @@ -0,0 +1,363 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud.bigquery.dataset import Dataset, DatasetReference +from .helpers import make_connection, dataset_polymorphic, make_client +import google.cloud.bigquery.dataset +import mock +import pytest + + +@dataset_polymorphic +def test_create_dataset_minimal(make_dataset, get_reference, client, PROJECT, DS_ID): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + } + conn = client._connection = make_connection(RESOURCE) + + dataset = make_dataset(PROJECT, DS_ID) + after = client.create_dataset(dataset, timeout=7.5) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + }, + timeout=7.5, + ) + + +def test_create_dataset_w_attrs(client, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import AccessEntry + + PATH = "projects/%s/datasets" % PROJECT + DESCRIPTION = "DESC" + FRIENDLY_NAME = "FN" + LOCATION = "US" + USER_EMAIL = "phred@example.com" + LABELS = {"color": "red"} + VIEW = { + "projectId": "my-proj", + "datasetId": "starry-skies", + "tableId": "northern-hemisphere", + } + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "labels": LABELS, + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + } + conn = client._connection = make_connection(RESOURCE) + entries = [ + AccessEntry("OWNER", "userByEmail", USER_EMAIL), + AccessEntry(None, "view", VIEW), + ] + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.access_entries = entries + before.description = DESCRIPTION + before.friendly_name = FRIENDLY_NAME + before.default_table_expiration_ms = 3600 + before.location = LOCATION + before.labels = LABELS + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.description == DESCRIPTION + assert after.friendly_name == FRIENDLY_NAME + assert after.location == LOCATION + assert after.default_table_expiration_ms == 3600 + assert after.labels == LABELS + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "labels": LABELS, + }, + timeout=None, + ) + + +def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): + # The library should handle sending properties to the API that are not + # yet part of the library + + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + } + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before._properties["newAlphaProperty"] = "unreleased property" + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after._properties["newAlphaProperty"] == "unreleased property" + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + "labels": {}, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + OTHER_LOCATION = "EU" + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": OTHER_LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.location = OTHER_LOCATION + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == OTHER_LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": OTHER_LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset(DatasetReference(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset("{}.{}".format(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_alreadyexists_w_exists_ok_false(PROJECT, DS_ID, LOCATION): + client = make_client(location=LOCATION) + client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_dataset(DS_ID) + + +def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION): + post_path = "/projects/{}/datasets".format(PROJECT) + get_path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists"), resource + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID, exists_ok=True) + + final_attributes.assert_called_with({"path": get_path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ), + mock.call(method="GET", path=get_path, timeout=None), + ] + ) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index cbd6f6909..039ef3b4c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -123,6 +123,7 @@ def _mock_job( schema=schema, num_dml_affected_rows=num_dml_affected_rows, ) + mock_job.destination.project = "P" mock_job.destination.to_bqstorage.return_value = ( "projects/P/datasets/DS/tables/T" ) @@ -177,6 +178,7 @@ def test_raises_error_if_closed(self): "fetchone", "setinputsizes", "setoutputsize", + "__iter__", ) for method in method_names: @@ -380,6 +382,52 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # Use unordered data to also test any non-determenistic key order in dicts. + row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with mock.patch( + "google.cloud.bigquery.dbapi.cursor._ARROW_COMPRESSION_SUPPORT", new=False + ): + rows = cursor.fetchall() + + mock_client.list_rows.assert_not_called() # The default client was not used. + + # Check the BQ Storage session config. + expected_session = bigquery_storage.ReadSession( + table="projects/P/datasets/DS/tables/T", + data_format=bigquery_storage.DataFormat.ARROW, + ) + mock_bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/P", read_session=expected_session, max_stream_count=1 + ) + + # Check the data returned. + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [[("foo", 1.1), ("bar", 1.2)]] + + self.assertEqual(sorted_row_data, expected_row_data) + def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect @@ -564,6 +612,29 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_is_iterable(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect( + self._mock_client(rows=[("hello", "there", 7), ("good", "bye", -3)]) + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar, baz FROM hello_world WHERE baz < 42;") + + rows_iter = iter(cursor) + + row = next(rows_iter) + self.assertEqual(row, ("hello", "there", 7)) + row = next(rows_iter) + self.assertEqual(row, ("good", "bye", -3)) + self.assertRaises(StopIteration, next, rows_iter) + + self.assertEqual( + list(cursor), + [], + "Iterating again over the same results should produce no rows.", + ) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor @@ -586,6 +657,14 @@ def test__format_operation_w_wrong_dict(self): {"somevalue-not-here": "hi", "othervalue": "world"}, ) + def test__format_operation_w_redundant_dict_key(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation( + "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} + ) + self.assertEqual(formatted_operation, "SELECT @`somevalue`;") + def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor @@ -605,8 +684,53 @@ def test__format_operation_w_too_short_sequence(self): ("hello",), ) + def test__format_operation_w_too_long_sequence(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s;", + ("hello", "world", "everyone"), + ) + def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor formatted_operation = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") + + def test__format_operation_wo_params_single_percent(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_wo_params_double_percents(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_unescaped_percent_w_dict_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %(foo)s, '100 %';", + {"foo": "bar"}, + ) + + def test__format_operation_unescaped_percent_w_list_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s, '100 %';", + ["foo", "bar"], + ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py new file mode 100644 index 000000000..3a65e031c --- /dev/null +++ b/tests/unit/test_delete_dataset.py @@ -0,0 +1,78 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import make_connection, make_client, dataset_polymorphic +import google.api_core.exceptions +import pytest + + +@dataset_polymorphic +def test_delete_dataset(make_dataset, get_reference, client, PROJECT, DS_ID): + dataset = make_dataset(PROJECT, DS_ID) + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + client.delete_dataset(dataset, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_delete_dataset_delete_contents( + make_dataset, get_reference, client, PROJECT, DS_ID +): + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + dataset = make_dataset(PROJECT, DS_ID) + client.delete_dataset(dataset, delete_contents=True) + conn.api_request.assert_called_with( + method="DELETE", + path="/%s" % PATH, + query_params={"deleteContents": "true"}, + timeout=None, + ) + + +def test_delete_dataset_wrong_type(client): + with pytest.raises(TypeError): + client.delete_dataset(42) + + +def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + with pytest.raises(google.api_core.exceptions.NotFound): + client.delete_dataset(DS_ID) + + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) + + +def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + client.delete_dataset(DS_ID, not_found_ok=True) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py new file mode 100644 index 000000000..56aa66126 --- /dev/null +++ b/tests/unit/test_list_models.py @@ -0,0 +1,86 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset_id = "{}.{}".format(PROJECT, DS_ID) + iterator = client.list_models(dataset_id, timeout=7.5) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert models == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.model import Model + + MODEL_1 = "model_one" + MODEL_2 = "model_two" + PATH = "projects/%s/datasets/%s/models" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "models": [ + { + "modelReference": { + "modelId": MODEL_1, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + { + "modelReference": { + "modelId": MODEL_2, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_models(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert len(models) == len(DATA["models"]) + for found, expected in zip(models, DATA["models"]): + assert isinstance(found, Model) + assert found.model_id == expected["modelReference"]["modelId"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_models_wrong_type(client): + with pytest.raises(TypeError): + client.list_models(42) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py new file mode 100644 index 000000000..714ede0d4 --- /dev/null +++ b/tests/unit/test_list_routines.py @@ -0,0 +1,89 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_routines_empty_w_timeout(client): + conn = client._connection = make_connection({}) + + iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + page = next(iterator.pages) + routines = list(page) + token = iterator.next_page_token + + assert routines == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routines/datasets/test_routines/routines", + query_params={}, + timeout=7.5, + ) + + +@dataset_polymorphic +def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): + from google.cloud.bigquery.routine import Routine + + project_id = PROJECT + dataset_id = "test_routines" + path = f"/projects/{PROJECT}/datasets/test_routines/routines" + routine_1 = "routine_one" + routine_2 = "routine_two" + token = "TOKEN" + resource = { + "nextPageToken": token, + "routines": [ + { + "routineReference": { + "routineId": routine_1, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + { + "routineReference": { + "routineId": routine_2, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + ], + } + + conn = client._connection = make_connection(resource) + dataset = make_dataset(client.project, dataset_id) + + iterator = client.list_routines(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + routines = list(page) + actual_token = iterator.next_page_token + + assert len(routines) == len(resource["routines"]) + for found, expected in zip(routines, resource["routines"]): + assert isinstance(found, Routine) + assert found.routine_id == expected["routineReference"]["routineId"] + assert actual_token == token + + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=None + ) + + +def test_list_routines_wrong_type(client): + with pytest.raises(TypeError): + client.list_routines(42) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py new file mode 100644 index 000000000..9acee9580 --- /dev/null +++ b/tests/unit/test_list_tables.py @@ -0,0 +1,159 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import make_connection, dataset_polymorphic +import google.cloud.bigquery.dataset +import pytest + + +@dataset_polymorphic +def test_list_tables_empty_w_timeout( + make_dataset, get_reference, client, PROJECT, DS_ID +): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "tables": [ + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_tables(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_tables_explicit(client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "tables": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ] + } + + conn = client._connection = make_connection(DATA) + dataset = google.cloud.bigquery.dataset.DatasetReference(PROJECT, DS_ID) + + iterator = client.list_tables( + # Test with string for dataset ID. + DS_ID, + max_results=3, + page_token=TOKEN, + ) + assert iterator.dataset == dataset + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) + + +def test_list_tables_wrong_type(client): + with pytest.raises(TypeError): + client.list_tables(42) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 318a54d34..0bef1e5e1 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -51,6 +51,12 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): + from google.auth.exceptions import TransportError + + exc = TransportError("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py index 6002ae3e8..e5016b0e5 100644 --- a/tests/unit/test_signature_compatibility.py +++ b/tests/unit/test_signature_compatibility.py @@ -31,20 +31,12 @@ def row_iterator_class(): return RowIterator -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_arrow) sig2 = inspect.signature(row_iterator_class.to_arrow) assert sig == sig2 -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_dataframe) sig2 = inspect.signature(row_iterator_class.to_dataframe) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3373528e0..ce4a15761 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1210,8 +1210,8 @@ def test_clustering_fields_setter_w_none(self): table._properties["clustering"] = {"fields": fields} table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_clustering_fields_setter_w_none_noop(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1219,8 +1219,8 @@ def test_clustering_fields_setter_w_none_noop(self): table = self._make_one(table_ref) table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_encryption_configuration_setter(self): # Previously, the EncryptionConfiguration class was in the table module, not the