From 155bacc156f181384ca6dba699ab83d0398176d1 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 20 Oct 2020 15:07:55 -0400 Subject: [PATCH 01/17] fix: create_job method accepts dictionary arguments (#300) * fix: broken create_job method * fix: changes in unit tests * fix: fix sourceTable thing * fix: handle sourceTable passed in job resource * fix: remove delete destination table from query * fix: revert destination table for query --- google/cloud/bigquery/client.py | 12 ++++--- tests/unit/test_client.py | 57 ++++++++++++++++----------------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cce393d6c..e4b5b22ab 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -48,11 +48,11 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject +from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type -from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -1619,6 +1619,7 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): ) destination = _get_sub_prop(job_config, ["load", "destinationTable"]) source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) + destination = TableReference.from_api_repr(destination) return self.load_table_from_uri( source_uris, destination, @@ -1631,9 +1632,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) + destination = TableReference.from_api_repr(destination) sources = [] source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) - if source_configs is None: source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] for source_config in source_configs: @@ -1651,10 +1652,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) - source_type = "Table" - if not source: + if source: + source_type = "Table" + source = TableReference.from_api_repr(source) + else: source = _get_sub_prop(job_config, ["extract", "sourceModel"]) source_type = "Model" + source = ModelReference.from_api_repr(source) destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) return self.extract_table( source, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2001ad42b..e507834f6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -3573,21 +3573,28 @@ def test_delete_table_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) - def _create_job_helper(self, job_config, client_method): + def _create_job_helper(self, job_config): + from google.cloud.bigquery import _helpers + creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection() - rf1 = mock.Mock() - get_config_patch = mock.patch( - "google.cloud.bigquery.job._JobConfig.from_api_repr", return_value=rf1, - ) - load_patch = mock.patch(client_method, autospec=True) + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": job_config, + } + conn = client._connection = make_connection(RESOURCE) + client.create_job(job_config=job_config) + if "query" in job_config: + _helpers._del_sub_prop(job_config, ["query", "destinationTable"]) - with load_patch as client_method, get_config_patch: - client.create_job(job_config=job_config) - client_method.assert_called_once() + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=None, + ) def test_create_job_load_config(self): configuration = { @@ -3601,9 +3608,7 @@ def test_create_job_load_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.load_table_from_uri" - ) + self._create_job_helper(configuration) def test_create_job_copy_config(self): configuration = { @@ -3623,9 +3628,7 @@ def test_create_job_copy_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_copy_config_w_single_source(self): configuration = { @@ -3643,9 +3646,7 @@ def test_create_job_copy_config_w_single_source(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config(self): configuration = { @@ -3658,9 +3659,7 @@ def test_create_job_extract_config(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config_for_model(self): configuration = { @@ -3673,17 +3672,17 @@ def test_create_job_extract_config_for_model(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_query_config(self): configuration = { - "query": {"query": "query", "destinationTable": {"tableId": "table_id"}} + "query": { + "query": "query", + "destinationTable": {"tableId": "table_id"}, + "useLegacySql": False, + } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.query", - ) + self._create_job_helper(configuration) def test_create_job_query_config_w_rateLimitExceeded_error(self): from google.cloud.exceptions import Forbidden From fd082551f4018d6c31fa48922bd0c2e301411213 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Oct 2020 20:47:13 +0200 Subject: [PATCH 02/17] chore(deps): update dependency grpcio to v1.33.1 (#338) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7d001fa2f..544e92eb1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 -grpcio==1.32.0 +grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From c9823d932205f128b673b05d6086ca783c85c354 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Oct 2020 10:04:34 -0500 Subject: [PATCH 03/17] test: make `_AsyncJob` tests mock at a lower layer (#340) This is intented to make the `_AsyncJob` tests more robust to changes in retry behavior. It also more explicitly tests the retry behavior by observing API calls rather than calls to certain methods. --- google/cloud/bigquery/client.py | 1 - google/cloud/bigquery/job.py | 7 +- tests/unit/test_job.py | 121 ++++++++++++++++++++++++-------- 3 files changed, 95 insertions(+), 34 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e4b5b22ab..57df9455e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -625,7 +625,6 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None def _call_api( self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs ): - call = functools.partial(self._connection.api_request, **kwargs) if retry: call = retry(call) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 766db1d42..6cb138acf 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -529,9 +529,8 @@ def state(self): Optional[str]: the state (None until set from the server). """ - status = self._properties.get("status") - if status is not None: - return status.get("state") + status = self._properties.get("status", {}) + return status.get("state") def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -588,7 +587,7 @@ def _check_resource_config(cls, resource): def to_api_repr(self): """Generate a resource for the job.""" - raise NotImplementedError("Abstract") + return copy.deepcopy(self._properties) _build_resource = to_api_repr # backward-compatibility alias diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 75212ae95..f577b08bd 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -20,6 +20,8 @@ import warnings import freezegun +from google.api_core import exceptions +import google.api_core.retry import mock import pytest import requests @@ -70,6 +72,12 @@ def _make_connection(*responses): return mock_conn +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + def _make_job_resource( creation_time_ms=1437767599006, started_time_ms=1437767600007, @@ -84,6 +92,7 @@ def _make_job_resource( user_email="bq-user@example.com", ): resource = { + "status": {"state": "PENDING"}, "configuration": {job_type: {}}, "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, @@ -97,9 +106,11 @@ def _make_job_resource( if started or ended: resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" if ended: resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" if job_type == "query": resource["configuration"]["query"]["destinationTable"] = { @@ -555,14 +566,14 @@ def test__check_resource_config_ok(self): def test__build_resource(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job._build_resource() + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test_to_api_repr(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job.to_api_repr() + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test__begin_already(self): job = self._set_properties_job() @@ -965,43 +976,95 @@ def test_done_already(self): self.assertTrue(job.done()) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_default_wo_state(self, result): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - self.assertIs(job.result(), result.return_value) + self.assertIs(job.result(), job) - begin.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_w_retry_wo_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - retry = mock.Mock() + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=retry), result.return_value) + self.assertIs(job.result(retry=custom_retry), job) - begin.assert_called_once_with(retry=retry, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_explicit_w_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - begin = job._begin = mock.Mock() + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) timeout = 1 - self.assertIs(job.result(timeout=timeout), result.return_value) + self.assertIs(job.result(timeout=timeout), job) - begin.assert_not_called() - result.assert_called_once_with(timeout=timeout) + conn.api_request.assert_not_called() def test_cancelled_wo_error_result(self): client = _make_client(project=self.PROJECT) From dca2e4ca7c2ae183ac4bb60f653d425a43a86bea Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 27 Oct 2020 10:34:55 -0400 Subject: [PATCH 04/17] docs: add documents for QueryPlanEntry and QueryPlanEntryStep (#344) --- docs/reference.rst | 2 ++ google/cloud/bigquery/job.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 21dd8e43d..3643831cb 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -59,6 +59,8 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.QueryPlanEntry + job.QueryPlanEntryStep job.QueryPriority job.SourceFormat job.WriteDisposition diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 6cb138acf..977d7a559 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -2844,7 +2844,7 @@ def query_plan(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan Returns: - List[QueryPlanEntry]: + List[google.cloud.bigquery.job.QueryPlanEntry]: mappings describing the query plan, or an empty list if the query has not yet completed. """ @@ -3418,7 +3418,6 @@ class QueryPlanEntryStep(object): Args: kind (str): step type. - substeps (List): names of substeps. """ @@ -3434,7 +3433,8 @@ def from_api_repr(cls, resource): resource (Dict): JSON representation of the entry. Returns: - QueryPlanEntryStep: new instance built from the resource. + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. """ return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) @@ -3464,7 +3464,7 @@ def from_api_repr(cls, resource): ExplainQueryStage representation returned from API. Returns: - google.cloud.bigquery.QueryPlanEntry: + google.cloud.bigquery.job.QueryPlanEntry: Query plan entry parsed from ``resource``. """ entry = cls() From e86aafe0258e45d2e9baa0fff9c47594db878a55 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 18:56:58 +0100 Subject: [PATCH 05/17] chore(deps): update dependency google-auth-oauthlib to v0.4.2 (#349) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 544e92eb1..fab797494 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 -google-auth-oauthlib==0.4.1 +google-auth-oauthlib==0.4.2 grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From d2ffc22013ca776bfa99d046b0419a9666c9d18e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:16:06 +0100 Subject: [PATCH 06/17] chore(deps): update dependency grpcio to v1.33.2 (#350) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [grpcio](https://grpc.io) | patch | `==1.33.1` -> `==1.33.2` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fab797494..3bcab1ace 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.2 -grpcio==1.33.1 +grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From e8be4898d70303cc4dfdf952114bb7adef46e39a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:36:05 +0100 Subject: [PATCH 07/17] chore(deps): update dependency google-cloud-bigquery-storage to v2.0.1 (#337) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | patch | `==2.0.0` -> `==2.0.1` | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.0.1`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​201-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev200v201-2020-10-21) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.0.0...v2.0.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3bcab1ace..411a86dae 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.1.0 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' From 5a925ec5b511a19aca1fc7640e54c55586078403 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:48:03 +0100 Subject: [PATCH 08/17] chore(deps): update dependency google-cloud-bigquery to v2.2.0 (#333) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.1.0` -> `==2.2.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.2.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​220-httpswwwgithubcomgoogleapispython-bigquerycomparev210v220-2020-10-19) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) ##### Features - add method api_repr for table list item ([#​299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) - add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#​328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) - add timeout paramter to load_table_from_file and it dependent methods ([#​327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) - add to_api_repr method to Model ([#​326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) - allow client options to be set in magics context ([#​322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) ##### Bug Fixes - make TimePartitioning repr evaluable ([#​110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#​109](https://www.github.com/googleapis/python-bigquery/issues/109) - use version.py instead of pkg_resources.get_distribution ([#​307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) ##### Performance Improvements - add size parameter for load table from dataframe and json methods ([#​280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) ##### Documentation - update clustering field docstrings ([#​286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#​285](https://www.github.com/googleapis/python-bigquery/issues/285) - update snippets samples to support version 2.0 ([#​309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) ##### Dependencies - add protobuf dependency ([#​306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#​305](https://www.github.com/googleapis/python-bigquery/issues/305) - require pyarrow for pandas support ([#​314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#​265](https://www.github.com/googleapis/python-bigquery/issues/265)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 411a86dae..c5ab3ef3d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.1.0 +google-cloud-bigquery==2.2.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From e51fd45fdb0481ac5d59cc0edbfa0750928b2596 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Oct 2020 16:32:02 -0500 Subject: [PATCH 09/17] feat: add `reload` argument to `*Job.done()` functions (#341) This enables checking the job status without making an API call. It also fixes an inconsistency in `QueryJob`, where a job can be reported as "done" without having the results of a `getQueryResults` API call. Follow-up to https://github.com/googleapis/python-bigquery/pull/340 --- google/cloud/bigquery/job.py | 67 +++++++------- tests/unit/test_job.py | 101 +++++++++++++++++--- tests/unit/test_magics.py | 173 +++++++++++++++++++---------------- 3 files changed, 218 insertions(+), 123 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 977d7a559..204c5f774 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -767,7 +767,7 @@ def _set_future_result(self): # set, do not call set_result/set_exception again. # Note: self._result_set is set to True in set_result and # set_exception, in case those methods are invoked directly. - if self.state != _DONE_STATE or self._result_set: + if not self.done(reload=False) or self._result_set: return if self.error_result is not None: @@ -776,21 +776,24 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None): - """Refresh the job and checks if it is complete. + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. Args: retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. - if self.state != _DONE_STATE: + if self.state != _DONE_STATE and reload: self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE @@ -3073,7 +3076,7 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None): + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Refresh the job and checks if it is complete. Args: @@ -3082,10 +3085,25 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + # Since the API to getQueryResults can hang up to the timeout value # (default of 10 seconds), set the timeout parameter to ensure that # the timeout from the futures API is respected. See: @@ -3103,23 +3121,20 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout - # Do not refresh if the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE @@ -3231,16 +3246,6 @@ def result( """ try: super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Return an iterator instead of returning the job. - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - location=self.location, - timeout=timeout, - ) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index f577b08bd..2d1e8fec8 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -45,6 +45,8 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +import google.cloud.bigquery.query + def _make_credentials(): import google.auth.credentials @@ -3942,10 +3944,6 @@ def _make_resource(self, started=False, ended=False): resource = super(TestQueryJob, self)._make_resource(started, ended) config = resource["configuration"]["query"] config["query"] = self.QUERY - - if ended: - resource["status"] = {"state": "DONE"} - return resource def _verifyBooleanResourceProperties(self, job, config): @@ -4211,6 +4209,9 @@ def test_done(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) self.assertTrue(job.done()) def test_done_w_timeout(self): @@ -4668,28 +4669,39 @@ def test_result(self): from google.cloud.bigquery.table import RowIterator query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } tabledata_resource = { - # Explicitly set totalRows to be different from the query response. - # to test update during iteration. + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) result = job.result() self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 2) - rows = list(result) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") @@ -4697,6 +4709,70 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -4938,6 +5014,9 @@ def test_result_error(self): "errors": [error_result], "state": "DONE", } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) job._set_future_result() with self.assertRaises(exceptions.GoogleCloudError) as exc_info: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 30ca4d70c..b2877845a 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -19,7 +19,6 @@ import mock import pytest -import six try: import pandas @@ -101,27 +100,38 @@ def fail_if(name, globals, locals, fromlist, level): return maybe_fail_import(predicate=fail_if) -JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"} +PROJECT_ID = "its-a-project-eh" +JOB_ID = "some-random-id" +JOB_REFERENCE_RESOURCE = {"projectId": PROJECT_ID, "jobId": JOB_ID} +DATASET_ID = "dest_dataset" +TABLE_ID = "dest_table" TABLE_REFERENCE_RESOURCE = { - "projectId": "its-a-project-eh", - "datasetId": "ds", - "tableId": "persons", + "projectId": PROJECT_ID, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, } +QUERY_STRING = "SELECT 42 AS the_answer FROM `life.the_universe.and_everything`;" QUERY_RESOURCE = { "jobReference": JOB_REFERENCE_RESOURCE, "configuration": { "query": { "destinationTable": TABLE_REFERENCE_RESOURCE, - "query": "SELECT 42 FROM `life.the_universe.and_everything`;", + "query": QUERY_STRING, "queryParameters": [], "useLegacySql": False, } }, "status": {"state": "DONE"}, } +QUERY_RESULTS_RESOURCE = { + "jobReference": JOB_REFERENCE_RESOURCE, + "totalRows": 1, + "jobComplete": True, + "schema": {"fields": [{"name": "the_answer", "type": "INTEGER"}]}, +} -def test_context_credentials_auto_set_w_application_default_credentials(): +def test_context_with_default_credentials(): """When Application Default Credentials are set, the context credentials will be created the first time it is called """ @@ -142,6 +152,50 @@ def test_context_credentials_auto_set_w_application_default_credentials(): assert default_mock.call_count == 2 +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_context_with_default_connection(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._credentials = None + magics.context._project = None + magics.context._connection = None + + default_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") + ) + default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) + conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + + # Check that query actually starts the job. + conn.assert_called() + list_rows.assert_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + default_conn.api_request.assert_has_calls([begin_call, query_results_call]) + + def test_context_credentials_and_project_can_be_set_explicitly(): project1 = "one-project-55564" project2 = "other-project-52569" @@ -163,93 +217,47 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_connection_can_be_overriden(): +def test_context_with_custom_connection(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None magics.context._credentials = None - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) - ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn = magics.context._connection = make_connection(resource, data) - list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", - return_value=google.cloud.bigquery.table._EmptyRowIterator(), + context_conn = magics.context._connection = make_connection( + QUERY_RESOURCE, QUERY_RESULTS_RESOURCE ) - with list_rows_patch as list_rows, default_patch: - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. - list_rows.assert_called() - assert len(conn.api_request.call_args_list) == 2 - _, req = conn.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query - - -@pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_no_connection(): - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - magics.context._project = None - magics.context._credentials = None - magics.context._connection = None - - credentials_mock = mock.create_autospec( + default_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn_mock = make_connection(resource, data, data, data) + default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) - with conn_patch as conn, list_rows_patch as list_rows, default_patch: - conn.return_value = conn_mock - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + list_rows.assert_called() - assert len(conn_mock.api_request.call_args_list) == 2 - _, req = conn_mock.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query + default_conn.api_request.assert_not_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + context_conn.api_request.assert_has_calls([begin_call, query_results_call]) def test__run_query(): @@ -1060,6 +1068,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1067,7 +1076,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1098,6 +1107,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1105,7 +1115,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1136,6 +1146,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1143,7 +1154,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), From 5dd1a5e77f13b8e576e917069e247c5390a81900 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 29 Oct 2020 10:16:07 -0400 Subject: [PATCH 10/17] fix(dbapi): avoid running % format with no query parameters (#348) * fix: aviod running %format when no query params * fix: nit * fix: change in unit test --- google/cloud/bigquery/dbapi/cursor.py | 2 +- tests/unit/test_dbapi_cursor.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 63264e9ab..597313fd6 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -441,7 +441,7 @@ def _format_operation(operation, parameters=None): if a parameter used in the operation is not found in the ``parameters`` argument. """ - if parameters is None: + if parameters is None or len(parameters) == 0: return operation if isinstance(parameters, collections_abc.Mapping): diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 9a1a6b1e8..5c3bfcae9 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -601,3 +601,9 @@ def test__format_operation_w_too_short_sequence(self): "SELECT %s, %s;", ("hello",), ) + + def test__format_operation_w_empty_dict(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%f'", {}) + self.assertEqual(formatted_operation, "SELECT '%f'") From 8a8080ba04647291907e61eea5f21f649fadadb3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 31 Oct 2020 00:28:57 +0100 Subject: [PATCH 11/17] chore(deps): update dependency pandas to v1.1.4 (#355) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c5ab3ef3d..bf895a1ae 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -5,6 +5,6 @@ grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 -pandas==1.1.3 +pandas==1.1.4 pyarrow==1.0.1 pytz==2020.1 From 284e17a17adf6844a17db2c6fed54a649b1f997e Mon Sep 17 00:00:00 2001 From: Ilya Gurov Date: Tue, 3 Nov 2020 18:43:15 +0300 Subject: [PATCH 12/17] feat: pass retry from Job.result() to Job.done() (#41) * feat(bigquery): pass retry from Job.result() to Job.done(). * fix merge conflicts * drop the comment * use kwargs sentinel * check the mock retry * update dependencies * use kwargs pattern * feat: added unit test for retry * feat: added more exceptions Co-authored-by: Tim Swast Co-authored-by: HemangChothani --- google/cloud/bigquery/job.py | 16 ++++----- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/test_job.py | 67 ++++++++++++++++++++++++++++++++---- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 204c5f774..e2e7e839a 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -819,8 +819,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """ if self.state is None: self._begin(retry=retry, timeout=timeout) - # TODO: modify PollingFuture so it can pass a retry argument to done(). - return super(_AsyncJob, self).result(timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) def cancelled(self): """Check if the job has been cancelled. @@ -1845,7 +1846,7 @@ def destination(self): """ return TableReference.from_api_repr( _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"], + self._properties, ["configuration", "copy", "destinationTable"] ) ) @@ -2043,10 +2044,7 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): self._configuration = job_config if source: - source_ref = { - "projectId": source.project, - "datasetId": source.dataset_id, - } + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} if isinstance(source, (Table, TableListItem, TableReference)): source_ref["tableId"] = source.table_id @@ -3138,10 +3136,10 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None): + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout) + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) @staticmethod def _format_for_exception(query, job_id): diff --git a/setup.py b/setup.py index c7410601e..548ceac09 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index cea0ed84e..91a507a5c 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -1,4 +1,4 @@ -google-api-core==1.22.2 +google-api-core==1.23.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 2d1e8fec8..8590e0576 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -864,7 +864,7 @@ def test_cancel_w_custom_retry(self): job = self._set_properties_job() api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response], + job._client._connection, "api_request", side_effect=[ValueError, response] ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, ValueError) @@ -885,7 +885,7 @@ def test_cancel_w_custom_retry(self): [ mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5, + method="POST", path=api_path, query_params={}, timeout=7.5 ), # was retried once ], ) @@ -1034,7 +1034,6 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( @@ -2757,7 +2756,7 @@ def test_cancel_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -2779,7 +2778,7 @@ def test_cancel_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3205,7 +3204,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -3620,7 +3619,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -4812,6 +4811,60 @@ def test_result_with_max_results(self): tabledata_list_request[1]["query_params"]["maxResults"], max_results ) + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator From 4f326b1ca4411cfbf5ded86955a963d3e05a409f Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 3 Nov 2020 16:58:03 +0100 Subject: [PATCH 13/17] fix: add missing spaces in opentelemetry log message (#360) Currently this log message renders like this: ``` This service is instrumented using OpenTelemetry.OpenTelemetry could not be imported; pleaseadd opentelemetry-api and opentelemetry-instrumentationpackages in order to get BigQuery Tracing data. ``` where it should be ``` This service is instrumented using OpenTelemetry. OpenTelemetry could not be imported; please add opentelemetry-api and opentelemetry-instrumentation packages in order to get BigQuery Tracing data." ``` --- google/cloud/bigquery/opentelemetry_tracing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index f7375c346..b9d18efad 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -26,9 +26,9 @@ except ImportError: logger.info( - "This service is instrumented using OpenTelemetry." - "OpenTelemetry could not be imported; please" - "add opentelemetry-api and opentelemetry-instrumentation" + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry could not be imported; please " + "add opentelemetry-api and opentelemetry-instrumentation " "packages in order to get BigQuery Tracing data." ) From 2849e569d0423e6e40bda953b0e9d38157aaf2df Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 09:09:35 -0600 Subject: [PATCH 14/17] refactor: break job into multiple modules (#361) Original paths are retained for backwards compatibility. --- google/cloud/bigquery/enums.py | 139 + google/cloud/bigquery/job.py | 3846 --------------- google/cloud/bigquery/job/__init__.py | 77 + google/cloud/bigquery/job/base.py | 912 ++++ google/cloud/bigquery/job/copy_.py | 223 + google/cloud/bigquery/job/extract.py | 266 + google/cloud/bigquery/job/load.py | 758 +++ google/cloud/bigquery/job/query.py | 1644 +++++++ tests/unit/job/__init__.py | 13 + tests/unit/job/helpers.py | 198 + tests/unit/job/test_base.py | 1105 +++++ tests/unit/job/test_copy.py | 477 ++ tests/unit/job/test_extract.py | 437 ++ tests/unit/job/test_load.py | 838 ++++ tests/unit/job/test_load_config.py | 710 +++ tests/unit/job/test_query.py | 1811 +++++++ tests/unit/job/test_query_config.py | 255 + tests/unit/job/test_query_pandas.py | 450 ++ tests/unit/job/test_query_stats.py | 356 ++ tests/unit/test_job.py | 6448 ------------------------- 20 files changed, 10669 insertions(+), 10294 deletions(-) delete mode 100644 google/cloud/bigquery/job.py create mode 100644 google/cloud/bigquery/job/__init__.py create mode 100644 google/cloud/bigquery/job/base.py create mode 100644 google/cloud/bigquery/job/copy_.py create mode 100644 google/cloud/bigquery/job/extract.py create mode 100644 google/cloud/bigquery/job/load.py create mode 100644 google/cloud/bigquery/job/query.py create mode 100644 tests/unit/job/__init__.py create mode 100644 tests/unit/job/helpers.py create mode 100644 tests/unit/job/test_base.py create mode 100644 tests/unit/job/test_copy.py create mode 100644 tests/unit/job/test_extract.py create mode 100644 tests/unit/job/test_load.py create mode 100644 tests/unit/job/test_load_config.py create mode 100644 tests/unit/job/test_query.py create mode 100644 tests/unit/job/test_query_config.py create mode 100644 tests/unit/job/test_query_pandas.py create mode 100644 tests/unit/job/test_query_stats.py delete mode 100644 tests/unit/test_job.py diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index eb33e4276..3f72333af 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -20,6 +20,124 @@ from google.cloud.bigquery_v2 import types as gapic_types +class Compression(object): + """The compression type to use for exported files. The default value is + :attr:`NONE`. + + :attr:`DEFLATE` and :attr:`SNAPPY` are + only supported for Avro. + """ + + GZIP = "GZIP" + """Specifies GZIP format.""" + + DEFLATE = "DEFLATE" + """Specifies DEFLATE format.""" + + SNAPPY = "SNAPPY" + """Specifies SNAPPY format.""" + + NONE = "NONE" + """Specifies no compression.""" + + +class CreateDisposition(object): + """Specifies whether the job is allowed to create new tables. The default + value is :attr:`CREATE_IF_NEEDED`. + + Creation, truncation and append actions occur as one atomic update + upon job completion. + """ + + CREATE_IF_NEEDED = "CREATE_IF_NEEDED" + """If the table does not exist, BigQuery creates the table.""" + + CREATE_NEVER = "CREATE_NEVER" + """The table must already exist. If it does not, a 'notFound' error is + returned in the job result.""" + + +class DestinationFormat(object): + """The exported file format. The default value is :attr:`CSV`. + + Tables with nested or repeated fields cannot be exported as CSV. + """ + + CSV = "CSV" + """Specifies CSV format.""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + +class Encoding(object): + """The character encoding of the data. The default is :attr:`UTF_8`. + + BigQuery decodes the data after the raw, binary data has been + split using the values of the quote and fieldDelimiter properties. + """ + + UTF_8 = "UTF-8" + """Specifies UTF-8 encoding.""" + + ISO_8859_1 = "ISO-8859-1" + """Specifies ISO-8859-1 encoding.""" + + +class QueryPriority(object): + """Specifies a priority for the query. The default value is + :attr:`INTERACTIVE`. + """ + + INTERACTIVE = "INTERACTIVE" + """Specifies interactive priority.""" + + BATCH = "BATCH" + """Specifies batch priority.""" + + +class SchemaUpdateOption(object): + """Specifies an update to the destination table schema as a side effect of + a load job. + """ + + ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" + """Allow adding a nullable field to the schema.""" + + ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" + """Allow relaxing a required field in the original schema to nullable.""" + + +class SourceFormat(object): + """The format of the data files. The default value is :attr:`CSV`. + + Note that the set of allowed values for loading data is different + than the set used for external data sources (see + :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). + """ + + CSV = "CSV" + """Specifies CSV format.""" + + DATASTORE_BACKUP = "DATASTORE_BACKUP" + """Specifies datastore backup format""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + + ORC = "ORC" + """Specifies Orc format.""" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", @@ -92,3 +210,24 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + + +class WriteDisposition(object): + """Specifies the action that occurs if destination table already exists. + + The default value is :attr:`WRITE_APPEND`. + + Each action is atomic and only occurs if BigQuery is able to complete + the job successfully. Creation, truncation and append actions occur as one + atomic update upon job completion. + """ + + WRITE_APPEND = "WRITE_APPEND" + """If the table already exists, BigQuery appends the data to the table.""" + + WRITE_TRUNCATE = "WRITE_TRUNCATE" + """If the table already exists, BigQuery overwrites the table data.""" + + WRITE_EMPTY = "WRITE_EMPTY" + """If the table already exists and contains data, a 'duplicate' error is + returned in the job result.""" diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py deleted file mode 100644 index e2e7e839a..000000000 --- a/google/cloud/bigquery/job.py +++ /dev/null @@ -1,3846 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Define API Jobs.""" - -from __future__ import division - -import concurrent.futures -import copy -import re -import threading - -import requests -import six -from six.moves import http_client - -import google.api_core.future.polling -from google.cloud import exceptions -from google.cloud.exceptions import NotFound -from google.cloud.bigquery.dataset import Dataset -from google.cloud.bigquery.dataset import DatasetListItem -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.external_config import HivePartitioningOptions -from google.cloud.bigquery import _helpers -from google.cloud.bigquery.model import ModelReference -from google.cloud.bigquery.query import _query_param_from_api_repr -from google.cloud.bigquery.query import ArrayQueryParameter -from google.cloud.bigquery.query import ScalarQueryParameter -from google.cloud.bigquery.query import StructQueryParameter -from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY -from google.cloud.bigquery.routine import RoutineReference -from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery.table import _EmptyRowIterator -from google.cloud.bigquery.table import RangePartitioning -from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table -from google.cloud.bigquery.table import TableListItem -from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import TimePartitioning - -_DONE_STATE = "DONE" -_STOPPED_REASON = "stopped" -_TIMEOUT_BUFFER_SECS = 0.1 -_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) - -_ERROR_REASON_TO_EXCEPTION = { - "accessDenied": http_client.FORBIDDEN, - "backendError": http_client.INTERNAL_SERVER_ERROR, - "billingNotEnabled": http_client.FORBIDDEN, - "billingTierLimitExceeded": http_client.BAD_REQUEST, - "blocked": http_client.FORBIDDEN, - "duplicate": http_client.CONFLICT, - "internalError": http_client.INTERNAL_SERVER_ERROR, - "invalid": http_client.BAD_REQUEST, - "invalidQuery": http_client.BAD_REQUEST, - "notFound": http_client.NOT_FOUND, - "notImplemented": http_client.NOT_IMPLEMENTED, - "quotaExceeded": http_client.FORBIDDEN, - "rateLimitExceeded": http_client.FORBIDDEN, - "resourceInUse": http_client.BAD_REQUEST, - "resourcesExceeded": http_client.BAD_REQUEST, - "responseTooLarge": http_client.FORBIDDEN, - "stopped": http_client.OK, - "tableUnavailable": http_client.BAD_REQUEST, -} - - -def _error_result_to_exception(error_result): - """Maps BigQuery error reasons to an exception. - - The reasons and their matching HTTP status codes are documented on - the `troubleshooting errors`_ page. - - .. _troubleshooting errors: https://cloud.google.com/bigquery\ - /troubleshooting-errors - - Args: - error_result (Mapping[str, str]): The error result from BigQuery. - - Returns: - google.cloud.exceptions.GoogleCloudError: The mapped exception. - """ - reason = error_result.get("reason") - status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR - ) - return exceptions.from_http_status( - status_code, error_result.get("message", ""), errors=[error_result] - ) - - -def _contains_order_by(query): - """Do we need to preserve the order of the query results? - - This function has known false positives, such as with ordered window - functions: - - .. code-block:: sql - - SELECT SUM(x) OVER ( - window_name - PARTITION BY... - ORDER BY... - window_frame_clause) - FROM ... - - This false positive failure case means the behavior will be correct, but - downloading results with the BigQuery Storage API may be slower than it - otherwise would. This is preferable to the false negative case, where - results are expected to be in order but are not (due to parallel reads). - """ - return query and _CONTAINS_ORDER_BY.search(query) - - -class Compression(object): - """The compression type to use for exported files. The default value is - :attr:`NONE`. - - :attr:`DEFLATE` and :attr:`SNAPPY` are - only supported for Avro. - """ - - GZIP = "GZIP" - """Specifies GZIP format.""" - - DEFLATE = "DEFLATE" - """Specifies DEFLATE format.""" - - SNAPPY = "SNAPPY" - """Specifies SNAPPY format.""" - - NONE = "NONE" - """Specifies no compression.""" - - -class CreateDisposition(object): - """Specifies whether the job is allowed to create new tables. The default - value is :attr:`CREATE_IF_NEEDED`. - - Creation, truncation and append actions occur as one atomic update - upon job completion. - """ - - CREATE_IF_NEEDED = "CREATE_IF_NEEDED" - """If the table does not exist, BigQuery creates the table.""" - - CREATE_NEVER = "CREATE_NEVER" - """The table must already exist. If it does not, a 'notFound' error is - returned in the job result.""" - - -class DestinationFormat(object): - """The exported file format. The default value is :attr:`CSV`. - - Tables with nested or repeated fields cannot be exported as CSV. - """ - - CSV = "CSV" - """Specifies CSV format.""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - -class Encoding(object): - """The character encoding of the data. The default is :attr:`UTF_8`. - - BigQuery decodes the data after the raw, binary data has been - split using the values of the quote and fieldDelimiter properties. - """ - - UTF_8 = "UTF-8" - """Specifies UTF-8 encoding.""" - - ISO_8859_1 = "ISO-8859-1" - """Specifies ISO-8859-1 encoding.""" - - -class QueryPriority(object): - """Specifies a priority for the query. The default value is - :attr:`INTERACTIVE`. - """ - - INTERACTIVE = "INTERACTIVE" - """Specifies interactive priority.""" - - BATCH = "BATCH" - """Specifies batch priority.""" - - -class SourceFormat(object): - """The format of the data files. The default value is :attr:`CSV`. - - Note that the set of allowed values for loading data is different - than the set used for external data sources (see - :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). - """ - - CSV = "CSV" - """Specifies CSV format.""" - - DATASTORE_BACKUP = "DATASTORE_BACKUP" - """Specifies datastore backup format""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - PARQUET = "PARQUET" - """Specifies Parquet format.""" - - ORC = "ORC" - """Specifies Orc format.""" - - -class WriteDisposition(object): - """Specifies the action that occurs if destination table already exists. - - The default value is :attr:`WRITE_APPEND`. - - Each action is atomic and only occurs if BigQuery is able to complete - the job successfully. Creation, truncation and append actions occur as one - atomic update upon job completion. - """ - - WRITE_APPEND = "WRITE_APPEND" - """If the table already exists, BigQuery appends the data to the table.""" - - WRITE_TRUNCATE = "WRITE_TRUNCATE" - """If the table already exists, BigQuery overwrites the table data.""" - - WRITE_EMPTY = "WRITE_EMPTY" - """If the table already exists and contains data, a 'duplicate' error is - returned in the job result.""" - - -class SchemaUpdateOption(object): - """Specifies an update to the destination table schema as a side effect of - a load job. - """ - - ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" - """Allow adding a nullable field to the schema.""" - - ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" - """Allow relaxing a required field in the original schema to nullable.""" - - -class _JobReference(object): - """A reference to a job. - - Args: - job_id (str): ID of the job to run. - project (str): ID of the project where the job runs. - location (str): Location of where the job runs. - """ - - def __init__(self, job_id, project, location): - self._properties = {"jobId": job_id, "projectId": project} - # The location field must not be populated if it is None. - if location: - self._properties["location"] = location - - @property - def job_id(self): - """str: ID of the job.""" - return self._properties.get("jobId") - - @property - def project(self): - """str: ID of the project where the job runs.""" - return self._properties.get("projectId") - - @property - def location(self): - """str: Location where the job runs.""" - return self._properties.get("location") - - def _to_api_repr(self): - """Returns the API resource representation of the job reference.""" - return copy.deepcopy(self._properties) - - @classmethod - def _from_api_repr(cls, resource): - """Returns a job reference for an API resource representation.""" - job_id = resource.get("jobId") - project = resource.get("projectId") - location = resource.get("location") - job_ref = cls(job_id, project, location) - return job_ref - - -class _AsyncJob(google.api_core.future.polling.PollingFuture): - """Base class for asynchronous jobs. - - Args: - job_id (Union[str, _JobReference]): - Job's ID in the project associated with the client or a - fully-qualified job reference. - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project configuration. - """ - - def __init__(self, job_id, client): - super(_AsyncJob, self).__init__() - - # The job reference can be either a plain job ID or the full resource. - # Populate the properties dictionary consistently depending on what has - # been passed in. - job_ref = job_id - if not isinstance(job_id, _JobReference): - job_ref = _JobReference(job_id, client.project, None) - self._properties = {"jobReference": job_ref._to_api_repr()} - - self._client = client - self._result_set = False - self._completion_lock = threading.Lock() - - @property - def job_id(self): - """str: ID of the job.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) - - @property - def parent_job_id(self): - """Return the ID of the parent job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id - - Returns: - Optional[str]: parent job id. - """ - return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) - - @property - def script_statistics(self): - resource = _helpers._get_sub_prop( - self._properties, ["statistics", "scriptStatistics"] - ) - if resource is None: - return None - return ScriptStatistics(resource) - - @property - def num_child_jobs(self): - """The number of child jobs executed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs - - Returns: - int - """ - count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) - return int(count) if count is not None else 0 - - @property - def project(self): - """Project bound to the job. - - Returns: - str: the project (derived from the client). - """ - return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) - - @property - def location(self): - """str: Location where the job runs.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) - - def _require_client(self, client): - """Check client or verify over-ride. - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - Returns: - google.cloud.bigquery.client.Client: - The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - - @property - def job_type(self): - """Type of job. - - Returns: - str: one of 'load', 'copy', 'extract', 'query'. - """ - return self._JOB_TYPE - - @property - def path(self): - """URL path for the job's APIs. - - Returns: - str: the path based on project and job ID. - """ - return "/projects/%s/jobs/%s" % (self.project, self.job_id) - - @property - def labels(self): - """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault("labels", {}) - - @property - def etag(self): - """ETag for the job resource. - - Returns: - Optional[str]: the ETag (None until set from the server). - """ - return self._properties.get("etag") - - @property - def self_link(self): - """URL for the job resource. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("selfLink") - - @property - def user_email(self): - """E-mail address of user who submitted the job. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("user_email") - - @property - def created(self): - """Datetime at which the job was created. - - Returns: - Optional[datetime.datetime]: - the creation time (None until set from the server). - """ - millis = _helpers._get_sub_prop( - self._properties, ["statistics", "creationTime"] - ) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def started(self): - """Datetime at which the job was started. - - Returns: - Optional[datetime.datetime]: - the start time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def ended(self): - """Datetime at which the job finished. - - Returns: - Optional[datetime.datetime]: - the end time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - def _job_statistics(self): - """Helper for job-type specific statistics-based properties.""" - statistics = self._properties.get("statistics", {}) - return statistics.get(self._JOB_TYPE, {}) - - @property - def error_result(self): - """Error information about the job as a whole. - - Returns: - Optional[Mapping]: the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errorResult") - - @property - def errors(self): - """Information about individual errors generated by the job. - - Returns: - Optional[List[Mapping]]: - the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errors") - - @property - def state(self): - """Status of the job. - - Returns: - Optional[str]: - the state (None until set from the server). - """ - status = self._properties.get("status", {}) - return status.get("state") - - def _set_properties(self, api_response): - """Update properties from resource in body of ``api_response`` - - Args: - api_response (Dict): response returned from an API call. - """ - cleaned = api_response.copy() - - statistics = cleaned.get("statistics", {}) - if "creationTime" in statistics: - statistics["creationTime"] = float(statistics["creationTime"]) - if "startTime" in statistics: - statistics["startTime"] = float(statistics["startTime"]) - if "endTime" in statistics: - statistics["endTime"] = float(statistics["endTime"]) - - # Save configuration to keep reference same in self._configuration. - cleaned_config = cleaned.pop("configuration", {}) - configuration = self._properties.pop("configuration", {}) - self._properties.clear() - self._properties.update(cleaned) - self._properties["configuration"] = configuration - self._properties["configuration"].update(cleaned_config) - - # For Future interface - self._set_future_result() - - @classmethod - def _check_resource_config(cls, resource): - """Helper for :meth:`from_api_repr` - - Args: - resource (Dict): resource for the job. - - Raises: - KeyError: - If the resource has no identifier, or - is missing the appropriate configuration. - """ - if "jobReference" not in resource or "jobId" not in resource["jobReference"]: - raise KeyError( - "Resource lacks required identity information: " - '["jobReference"]["jobId"]' - ) - if ( - "configuration" not in resource - or cls._JOB_TYPE not in resource["configuration"] - ): - raise KeyError( - "Resource lacks required configuration: " - '["configuration"]["%s"]' % cls._JOB_TYPE - ) - - def to_api_repr(self): - """Generate a resource for the job.""" - return copy.deepcopy(self._properties) - - _build_resource = to_api_repr # backward-compatibility alias - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType`` - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: - If the job has already begun. - """ - if self.state is not None: - raise ValueError("Job already begun.") - - client = self._require_client(client) - path = "/projects/%s/jobs" % (self.project,) - - # jobs.insert is idempotent because we ensure that every new - # job has an ID. - span_attributes = {"path": path} - api_response = client._call_api( - retry, - span_name="BigQuery.job.begin", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - data=self.to_api_repr(), - timeout=timeout, - ) - self._set_properties(api_response) - - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: test for the existence of the job via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Returns: - bool: Boolean indicating existence of the job. - """ - client = self._require_client(client) - - extra_params = {"fields": "id"} - if self.location: - extra_params["location"] = self.location - - try: - span_attributes = {"path": self.path} - - client._call_api( - retry, - span_name="BigQuery.job.exists", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - except NotFound: - return False - else: - return True - - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: refresh job properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - span_attributes = {"path": self.path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.reload", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response) - - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: cancel job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry`` - - Returns: - bool: Boolean indicating that the cancel request was sent. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - - path = "{}/cancel".format(self.path) - span_attributes = {"path": path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.cancel", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response["job"]) - # The Future interface requires that we return True if the *attempt* - # to cancel was successful. - return True - - # The following methods implement the PollingFuture interface. Note that - # the methods above are from the pre-Future interface and are left for - # compatibility. The only "overloaded" method is :meth:`cancel`, which - # satisfies both interfaces. - - def _set_future_result(self): - """Set the result or exception from the job if it is complete.""" - # This must be done in a lock to prevent the polling thread - # and main thread from both executing the completion logic - # at the same time. - with self._completion_lock: - # If the operation isn't complete or if the result has already been - # set, do not call set_result/set_exception again. - # Note: self._result_set is set to True in set_result and - # set_exception, in case those methods are invoked directly. - if not self.done(reload=False) or self._result_set: - return - - if self.error_result is not None: - exception = _error_result_to_exception(self.error_result) - self.set_exception(exception) - else: - self.set_result(self) - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Checks if the job is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - # Do not refresh is the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE and reload: - self.reload(retry=retry, timeout=timeout) - return self.state == _DONE_STATE - - def result(self, retry=DEFAULT_RETRY, timeout=None): - """Start the job and wait for it to complete and get the result. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - - Returns: - _AsyncJob: This instance. - - Raises: - google.cloud.exceptions.GoogleCloudError: - if the job failed. - concurrent.futures.TimeoutError: - if the job did not complete in the given timeout. - """ - if self.state is None: - self._begin(retry=retry, timeout=timeout) - - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) - - def cancelled(self): - """Check if the job has been cancelled. - - This always returns False. It's not possible to check if a job was - cancelled in the API. This method is here to satisfy the interface - for :class:`google.api_core.future.Future`. - - Returns: - bool: False - """ - return ( - self.error_result is not None - and self.error_result.get("reason") == _STOPPED_REASON - ) - - -class _JobConfig(object): - """Abstract base class for job configuration objects. - - Args: - job_type (str): The key to use for the job configuration. - """ - - def __init__(self, job_type, **kwargs): - self._job_type = job_type - self._properties = {job_type: {}} - for prop, val in kwargs.items(): - setattr(self, prop, val) - - @property - def labels(self): - """Dict[str, str]: Labels for the job. - - This method always returns a dict. To change a job's labels, - modify the dict, then call ``Client.update_job``. To delete a - label, set its value to :data:`None` before updating. - - Raises: - ValueError: If ``value`` type is invalid. - """ - return self._properties.setdefault("labels", {}) - - @labels.setter - def labels(self, value): - if not isinstance(value, dict): - raise ValueError("Pass a dict") - self._properties["labels"] = value - - def _get_sub_prop(self, key, default=None): - """Get a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access - those properties:: - - self._get_sub_prop('destinationTable') - - This is equivalent to using the ``_helpers._get_sub_prop`` function:: - - _helpers._get_sub_prop( - self._properties, ['query', 'destinationTable']) - - Args: - key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. - default (Optional[object]): - Default value to return if the key is not found. - Defaults to :data:`None`. - - Returns: - object: The value if present or the default. - """ - return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default - ) - - def _set_sub_prop(self, key, value): - """Set a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set - those properties:: - - self._set_sub_prop('useLegacySql', False) - - This is equivalent to using the ``_helper._set_sub_prop`` function:: - - _helper._set_sub_prop( - self._properties, ['query', 'useLegacySql'], False) - - Args: - key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. - value (object): Value to set. - """ - _helpers._set_sub_prop(self._properties, [self._job_type, key], value) - - def _del_sub_prop(self, key): - """Remove ``key`` from the ``self._properties[self._job_type]`` dict. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear - those properties:: - - self._del_sub_prop('useLegacySql') - - This is equivalent to using the ``_helper._del_sub_prop`` function:: - - _helper._del_sub_prop( - self._properties, ['query', 'useLegacySql']) - - Args: - key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. - """ - _helpers._del_sub_prop(self._properties, [self._job_type, key]) - - def to_api_repr(self): - """Build an API representation of the job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - def _fill_from_default(self, default_job_config): - """Merge this job config with a default job config. - - The keys in this object take precedence over the keys in the default - config. The merge is done at the top-level as well as for keys one - level below the job type. - - Args: - default_job_config (google.cloud.bigquery.job._JobConfig): - The default job config that will be used to fill in self. - - Returns: - google.cloud.bigquery.job._JobConfig: A new (merged) job config. - """ - if self._job_type != default_job_config._job_type: - raise TypeError( - "attempted to merge two incompatible job types: " - + repr(self._job_type) - + ", " - + repr(default_job_config._job_type) - ) - - new_job_config = self.__class__() - - default_job_properties = copy.deepcopy(default_job_config._properties) - for key in self._properties: - if key != self._job_type: - default_job_properties[key] = self._properties[key] - - default_job_properties[self._job_type].update(self._properties[self._job_type]) - new_job_config._properties = default_job_properties - - return new_job_config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation - - Args: - resource (Dict): - A job configuration in the same representation as is returned - from the API. - - Returns: - google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. - """ - job_config = cls() - job_config._properties = resource - return job_config - - -class LoadJobConfig(_JobConfig): - """Configuration options for load jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(LoadJobConfig, self).__init__("load", **kwargs) - - @property - def allow_jagged_rows(self): - """Optional[bool]: Allow missing trailing optional columns (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows - """ - return self._get_sub_prop("allowJaggedRows") - - @allow_jagged_rows.setter - def allow_jagged_rows(self, value): - self._set_sub_prop("allowJaggedRows", value) - - @property - def allow_quoted_newlines(self): - """Optional[bool]: Allow quoted data containing newline characters (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines - """ - return self._get_sub_prop("allowQuotedNewlines") - - @allow_quoted_newlines.setter - def allow_quoted_newlines(self, value): - self._set_sub_prop("allowQuotedNewlines", value) - - @property - def autodetect(self): - """Optional[bool]: Automatically infer the schema from a sample of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect - """ - return self._get_sub_prop("autodetect") - - @autodetect.setter - def autodetect(self, value): - self._set_sub_prop("autodetect", value) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def create_disposition(self): - """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior - for creating tables. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def destination_encryption_configuration(self): - """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - else: - self._del_sub_prop("destinationEncryptionConfiguration") - - @property - def destination_table_description(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["description"] - - @destination_table_description.setter - def destination_table_description(self, value): - keys = [self._job_type, "destinationTableProperties", "description"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def destination_table_friendly_name(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["friendlyName"] - - @destination_table_friendly_name.setter - def destination_table_friendly_name(self, value): - keys = [self._job_type, "destinationTableProperties", "friendlyName"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def encoding(self): - """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the - data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding - """ - return self._get_sub_prop("encoding") - - @encoding.setter - def encoding(self, value): - self._set_sub_prop("encoding", value) - - @property - def field_delimiter(self): - """Optional[str]: The separator for fields in a CSV file. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ - it configures hive partitioning support. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options - """ - prop = self._get_sub_prop("hivePartitioningOptions") - if prop is None: - return None - return HivePartitioningOptions.from_api_repr(prop) - - @hive_partitioning.setter - def hive_partitioning(self, value): - if value is not None: - if isinstance(value, HivePartitioningOptions): - value = value.to_api_repr() - else: - raise TypeError("Expected a HivePartitioningOptions instance or None.") - - self._set_sub_prop("hivePartitioningOptions", value) - - @property - def ignore_unknown_values(self): - """Optional[bool]: Ignore extra values not represented in the table schema. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values - """ - return self._get_sub_prop("ignoreUnknownValues") - - @ignore_unknown_values.setter - def ignore_unknown_values(self, value): - self._set_sub_prop("ignoreUnknownValues", value) - - @property - def max_bad_records(self): - """Optional[int]: Number of invalid rows to ignore. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records - """ - return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) - - @max_bad_records.setter - def max_bad_records(self, value): - self._set_sub_prop("maxBadRecords", value) - - @property - def null_marker(self): - """Optional[str]: Represents a null value (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker - """ - return self._get_sub_prop("nullMarker") - - @null_marker.setter - def null_marker(self, value): - self._set_sub_prop("nullMarker", value) - - @property - def quote_character(self): - """Optional[str]: Character used to quote data sections (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote - """ - return self._get_sub_prop("quote") - - @quote_character.setter - def quote_character(self, value): - self._set_sub_prop("quote", value) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def schema(self): - """Optional[Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]]: Schema of the destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema - """ - schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) - if schema is None: - return - return [SchemaField.from_api_repr(field) for field in schema] - - @schema.setter - def schema(self, value): - if value is None: - self._del_sub_prop("schema") - return - - value = _to_schema_fields(value) - - _helpers._set_sub_prop( - self._properties, - ["load", "schema", "fields"], - [field.to_api_repr() for field in value], - ) - - @property - def schema_update_options(self): - """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies - updates to the destination table schema to allow as a side effect of - the load job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - @property - def skip_leading_rows(self): - """Optional[int]: Number of rows to skip when reading data (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows - """ - return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) - - @skip_leading_rows.setter - def skip_leading_rows(self, value): - self._set_sub_prop("skipLeadingRows", str(value)) - - @property - def source_format(self): - """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format - """ - return self._get_sub_prop("sourceFormat") - - @source_format.setter - def source_format(self, value): - self._set_sub_prop("sourceFormat", value) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based - partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - else: - self._del_sub_prop("timePartitioning") - - @property - def use_avro_logical_types(self): - """Optional[bool]: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - @property - def write_disposition(self): - """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if - the destination table already exists. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - -class LoadJob(_AsyncJob): - """Asynchronous job for loading data into a table. - - Can load from Google Cloud Storage URIs or from a file. - - Args: - job_id (str): the job's ID - - source_uris (Optional[Sequence[str]]): - URIs of one or more data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. Pass None for jobs that load from a file. - - destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - """ - - _JOB_TYPE = "load" - - def __init__(self, job_id, source_uris, destination, client, job_config=None): - super(LoadJob, self).__init__(job_id, client) - - if not job_config: - job_config = LoadJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if source_uris is not None: - _helpers._set_sub_prop( - self._properties, ["configuration", "load", "sourceUris"], source_uris - ) - - if destination is not None: - _helpers._set_sub_prop( - self._properties, - ["configuration", "load", "destinationTable"], - destination.to_api_repr(), - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where loaded rows are written - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table - """ - dest_config = _helpers._get_sub_prop( - self._properties, ["configuration", "load", "destinationTable"] - ) - return TableReference.from_api_repr(dest_config) - - @property - def source_uris(self): - """Optional[Sequence[str]]: URIs of data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. None for jobs that load from a file. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "load", "sourceUris"] - ) - - @property - def allow_jagged_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. - """ - return self._configuration.allow_jagged_rows - - @property - def allow_quoted_newlines(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. - """ - return self._configuration.allow_quoted_newlines - - @property - def autodetect(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. - """ - return self._configuration.autodetect - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def encoding(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. - """ - return self._configuration.encoding - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def ignore_unknown_values(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. - """ - return self._configuration.ignore_unknown_values - - @property - def max_bad_records(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. - """ - return self._configuration.max_bad_records - - @property - def null_marker(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. - """ - return self._configuration.null_marker - - @property - def quote_character(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. - """ - return self._configuration.quote_character - - @property - def skip_leading_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. - """ - return self._configuration.skip_leading_rows - - @property - def source_format(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. - """ - return self._configuration.source_format - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def schema(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. - """ - return self._configuration.schema - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) - or :data:`None` if using default encryption. - - See - :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def destination_table_description(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - return self._configuration.destination_table_description - - @property - def destination_table_friendly_name(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - return self._configuration.destination_table_friendly_name - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def use_avro_logical_types(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. - """ - return self._configuration.use_avro_logical_types - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - @property - def input_file_bytes(self): - """Count of bytes loaded from source files. - - Returns: - Optional[int]: the count (None until set from the server). - - Raises: - ValueError: for invalid value types. - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFileBytes"] - ) - ) - - @property - def input_files(self): - """Count of source files. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFiles"] - ) - ) - - @property - def output_bytes(self): - """Count of bytes saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputBytes"] - ) - ) - - @property - def output_rows(self): - """Count of rows saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputRows"] - ) - ) - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client) - job._set_properties(resource) - return job - - -class CopyJobConfig(_JobConfig): - """Configuration options for copy jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(CopyJobConfig, self).__init__("copy", **kwargs) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - -class CopyJob(_AsyncJob): - """Asynchronous job: copy data into a table from other tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. - - destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): - Extra configuration options for the copy job. - """ - - _JOB_TYPE = "copy" - - def __init__(self, job_id, sources, destination, client, job_config=None): - super(CopyJob, self).__init__(job_id, client) - - if not job_config: - job_config = CopyJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if destination: - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "destinationTable"], - destination.to_api_repr(), - ) - - if sources: - source_resources = [source.to_api_repr() for source in sources] - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "sourceTables"], - source_resources, - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: Table into which data - is to be loaded. - """ - return TableReference.from_api_repr( - _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"] - ) - ) - - @property - def sources(self): - """List[google.cloud.bigquery.table.TableReference]): Table(s) from - which data is to be loaded. - """ - source_configs = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTables"] - ) - if source_configs is None: - single = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTable"] - ) - if single is None: - raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") - source_configs = [single] - - sources = [] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) - return sources - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -class ExtractJobConfig(_JobConfig): - """Configuration options for extract jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(ExtractJobConfig, self).__init__("extract", **kwargs) - - @property - def compression(self): - """google.cloud.bigquery.job.Compression: Compression type to use for - exported files. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression - """ - return self._get_sub_prop("compression") - - @compression.setter - def compression(self, value): - self._set_sub_prop("compression", value) - - @property - def destination_format(self): - """google.cloud.bigquery.job.DestinationFormat: Exported file format. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format - """ - return self._get_sub_prop("destinationFormat") - - @destination_format.setter - def destination_format(self, value): - self._set_sub_prop("destinationFormat", value) - - @property - def field_delimiter(self): - """str: Delimiter to use between fields in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def print_header(self): - """bool: Print a header row in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header - """ - return self._get_sub_prop("printHeader") - - @print_header.setter - def print_header(self, value): - self._set_sub_prop("printHeader", value) - - @property - def use_avro_logical_types(self): - """bool: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - -class ExtractJob(_AsyncJob): - """Asynchronous job: extract data from a table into Cloud Storage. - - Args: - job_id (str): the job's ID. - - source (Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]): - Table or Model from which data is to be loaded or extracted. - - destination_uris (List[str]): - URIs describing where the extracted data will be written in Cloud - Storage, using the format ``gs:///``. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration. - - job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): - Extra configuration options for the extract job. - """ - - _JOB_TYPE = "extract" - - def __init__(self, job_id, source, destination_uris, client, job_config=None): - super(ExtractJob, self).__init__(job_id, client) - - if job_config is None: - job_config = ExtractJobConfig() - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if source: - source_ref = {"projectId": source.project, "datasetId": source.dataset_id} - - if isinstance(source, (Table, TableListItem, TableReference)): - source_ref["tableId"] = source.table_id - source_key = "sourceTable" - else: - source_ref["modelId"] = source.model_id - source_key = "sourceModel" - - _helpers._set_sub_prop( - self._properties, ["configuration", "extract", source_key], source_ref - ) - - if destination_uris: - _helpers._set_sub_prop( - self._properties, - ["configuration", "extract", "destinationUris"], - destination_uris, - ) - - @property - def source(self): - """Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]: Table or Model from which data is to be loaded or extracted. - """ - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceTable"] - ) - if source_config: - return TableReference.from_api_repr(source_config) - else: - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceModel"] - ) - return ModelReference.from_api_repr(source_config) - - @property - def destination_uris(self): - """List[str]: URIs describing where the extracted data will be - written in Cloud Storage, using the format - ``gs:///``. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "destinationUris"] - ) - - @property - def compression(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. - """ - return self._configuration.compression - - @property - def destination_format(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. - """ - return self._configuration.destination_format - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def print_header(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. - """ - return self._configuration.print_header - - @property - def destination_uri_file_counts(self): - """Return file counts from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts - - Returns: - List[int]: - A list of integer counts, each representing the number of files - per destination URI or URI pattern specified in the extract - configuration. These values will be in the same order as the URIs - specified in the 'destinationUris' field. Returns None if job is - not yet complete. - """ - counts = self._job_statistics().get("destinationUriFileCounts") - if counts is not None: - return [int(count) for count in counts] - return None - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -def _from_api_repr_query_parameters(resource): - return [_query_param_from_api_repr(mapping) for mapping in resource] - - -def _to_api_repr_query_parameters(value): - return [query_parameter.to_api_repr() for query_parameter in value] - - -def _from_api_repr_udf_resources(resource): - udf_resources = [] - for udf_mapping in resource: - for udf_type, udf_value in udf_mapping.items(): - udf_resources.append(UDFResource(udf_type, udf_value)) - return udf_resources - - -def _to_api_repr_udf_resources(value): - return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] - - -def _from_api_repr_table_defs(resource): - return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} - - -def _to_api_repr_table_defs(value): - return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} - - -class QueryJobConfig(_JobConfig): - """Configuration options for query jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(QueryJobConfig, self).__init__("query", **kwargs) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - @property - def allow_large_results(self): - """bool: Allow large query results tables (legacy SQL, only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results - """ - return self._get_sub_prop("allowLargeResults") - - @allow_large_results.setter - def allow_large_results(self, value): - self._set_sub_prop("allowLargeResults", value) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def default_dataset(self): - """google.cloud.bigquery.dataset.DatasetReference: the default dataset - to use for unqualified table names in the query or :data:`None` if not - set. - - The ``default_dataset`` setter accepts: - - - a :class:`~google.cloud.bigquery.dataset.Dataset`, or - - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or - - a :class:`str` of the fully-qualified dataset ID in standard SQL - format. The value must included a project ID and dataset ID - separated by ``.``. For example: ``your-project.your_dataset``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset - """ - prop = self._get_sub_prop("defaultDataset") - if prop is not None: - prop = DatasetReference.from_api_repr(prop) - return prop - - @default_dataset.setter - def default_dataset(self, value): - if value is None: - self._set_sub_prop("defaultDataset", None) - return - - if isinstance(value, six.string_types): - value = DatasetReference.from_string(value) - - if isinstance(value, (Dataset, DatasetListItem)): - value = value.reference - - resource = value.to_api_repr() - self._set_sub_prop("defaultDataset", resource) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where results are - written or :data:`None` if not set. - - The ``destination`` setter accepts: - - - a :class:`~google.cloud.bigquery.table.Table`, or - - a :class:`~google.cloud.bigquery.table.TableReference`, or - - a :class:`str` of the fully-qualified table ID in standard SQL - format. The value must included a project ID, dataset ID, and table - ID, each separated by ``.``. For example: - ``your-project.your_dataset.your_table``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table - """ - prop = self._get_sub_prop("destinationTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @destination.setter - def destination(self, value): - if value is None: - self._set_sub_prop("destinationTable", None) - return - - value = _table_arg_to_table_ref(value) - resource = value.to_api_repr() - self._set_sub_prop("destinationTable", resource) - - @property - def dry_run(self): - """bool: :data:`True` if this query should be a dry run to estimate - costs. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run - """ - return self._properties.get("dryRun") - - @dry_run.setter - def dry_run(self, value): - self._properties["dryRun"] = value - - @property - def flatten_results(self): - """bool: Flatten nested/repeated fields in results. (Legacy SQL only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results - """ - return self._get_sub_prop("flattenResults") - - @flatten_results.setter - def flatten_results(self, value): - self._set_sub_prop("flattenResults", value) - - @property - def maximum_billing_tier(self): - """int: Deprecated. Changes the billing tier to allow high-compute - queries. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier - """ - return self._get_sub_prop("maximumBillingTier") - - @maximum_billing_tier.setter - def maximum_billing_tier(self, value): - self._set_sub_prop("maximumBillingTier", value) - - @property - def maximum_bytes_billed(self): - """int: Maximum bytes to be billed for this job or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed - """ - return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) - - @maximum_bytes_billed.setter - def maximum_bytes_billed(self, value): - self._set_sub_prop("maximumBytesBilled", str(value)) - - @property - def priority(self): - """google.cloud.bigquery.job.QueryPriority: Priority of the query. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority - """ - return self._get_sub_prop("priority") - - @priority.setter - def priority(self, value): - self._set_sub_prop("priority", value) - - @property - def query_parameters(self): - """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter]]: list of parameters - for parameterized query (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters - """ - prop = self._get_sub_prop("queryParameters", default=[]) - return _from_api_repr_query_parameters(prop) - - @query_parameters.setter - def query_parameters(self, values): - self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def udf_resources(self): - """List[google.cloud.bigquery.query.UDFResource]: user - defined function resources (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources - """ - prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) - return _from_api_repr_udf_resources(prop) - - @udf_resources.setter - def udf_resources(self, values): - self._set_sub_prop( - "userDefinedFunctionResources", _to_api_repr_udf_resources(values) - ) - - @property - def use_legacy_sql(self): - """bool: Use legacy SQL syntax. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql - """ - return self._get_sub_prop("useLegacySql") - - @use_legacy_sql.setter - def use_legacy_sql(self, value): - self._set_sub_prop("useLegacySql", value) - - @property - def use_query_cache(self): - """bool: Look for the query result in the cache. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache - """ - return self._get_sub_prop("useQueryCache") - - @use_query_cache.setter - def use_query_cache(self, value): - self._set_sub_prop("useQueryCache", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def table_definitions(self): - """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: - Definitions for external tables or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions - """ - prop = self._get_sub_prop("tableDefinitions") - if prop is not None: - prop = _from_api_repr_table_defs(prop) - return prop - - @table_definitions.setter - def table_definitions(self, values): - self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies - time-based partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.TimePartitioning` or - :data:`None`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def schema_update_options(self): - """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies - updates to the destination table schema to allow as a side effect of - the query job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - def to_api_repr(self): - """Build an API representation of the query job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - resource = copy.deepcopy(self._properties) - - # Query parameters have an addition property associated with them - # to indicate if the query is using named or positional parameters. - query_parameters = resource["query"].get("queryParameters") - if query_parameters: - if query_parameters[0].get("name") is None: - resource["query"]["parameterMode"] = "POSITIONAL" - else: - resource["query"]["parameterMode"] = "NAMED" - - return resource - - -class QueryJob(_AsyncJob): - """Asynchronous job: query tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - query (str): SQL query string. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): - Extra configuration options for the query job. - """ - - _JOB_TYPE = "query" - _UDF_KEY = "userDefinedFunctionResources" - - def __init__(self, job_id, query, client, job_config=None): - super(QueryJob, self).__init__(job_id, client) - - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if query: - _helpers._set_sub_prop( - self._properties, ["configuration", "query", "query"], query - ) - - self._query_results = None - self._done_timeout = None - self._transport_timeout = None - - @property - def allow_large_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. - """ - return self._configuration.allow_large_results - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def default_dataset(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. - """ - return self._configuration.default_dataset - - @property - def destination(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. - """ - return self._configuration.destination - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def dry_run(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. - """ - return self._configuration.dry_run - - @property - def flatten_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. - """ - return self._configuration.flatten_results - - @property - def priority(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. - """ - return self._configuration.priority - - @property - def query(self): - """str: The query text used in this query job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "query", "query"] - ) - - @property - def query_parameters(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. - """ - return self._configuration.query_parameters - - @property - def udf_resources(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. - """ - return self._configuration.udf_resources - - @property - def use_legacy_sql(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. - """ - return self._configuration.use_legacy_sql - - @property - def use_query_cache(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. - """ - return self._configuration.use_query_cache - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def maximum_billing_tier(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. - """ - return self._configuration.maximum_billing_tier - - @property - def maximum_bytes_billed(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. - """ - return self._configuration.maximum_bytes_billed - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def table_definitions(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. - """ - return self._configuration.table_definitions - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Use to_api_repr to allow for some configuration properties to be set - # automatically. - configuration = self._configuration.to_api_repr() - return { - "jobReference": self._properties["jobReference"], - "configuration": configuration, - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, client=client) - job._set_properties(resource) - return job - - @property - def query_plan(self): - """Return query plan from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan - - Returns: - List[google.cloud.bigquery.job.QueryPlanEntry]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - plan_entries = self._job_statistics().get("queryPlan", ()) - return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] - - @property - def timeline(self): - """List(TimelineEntry): Return the query execution timeline - from job statistics. - """ - raw = self._job_statistics().get("timeline", ()) - return [TimelineEntry.from_api_repr(entry) for entry in raw] - - @property - def total_bytes_processed(self): - """Return total bytes processed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesProcessed") - if result is not None: - result = int(result) - return result - - @property - def total_bytes_billed(self): - """Return total bytes billed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesBilled") - if result is not None: - result = int(result) - return result - - @property - def billing_tier(self): - """Return billing tier from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier - - Returns: - Optional[int]: - Billing tier used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("billingTier") - - @property - def cache_hit(self): - """Return whether or not query results were served from cache. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit - - Returns: - Optional[bool]: - whether the query results were returned from cache, or None - if job is not yet complete. - """ - return self._job_statistics().get("cacheHit") - - @property - def ddl_operation_performed(self): - """Optional[str]: Return the DDL operation performed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed - - """ - return self._job_statistics().get("ddlOperationPerformed") - - @property - def ddl_target_routine(self): - """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present - for CREATE/DROP FUNCTION/PROCEDURE queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine - """ - prop = self._job_statistics().get("ddlTargetRoutine") - if prop is not None: - prop = RoutineReference.from_api_repr(prop) - return prop - - @property - def ddl_target_table(self): - """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present - for CREATE/DROP TABLE/VIEW queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table - """ - prop = self._job_statistics().get("ddlTargetTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @property - def num_dml_affected_rows(self): - """Return the number of DML rows affected by the job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("numDmlAffectedRows") - if result is not None: - result = int(result) - return result - - @property - def slot_millis(self): - """Union[int, None]: Slot-milliseconds used by this query job.""" - return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) - - @property - def statement_type(self): - """Return statement type from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type - - Returns: - Optional[str]: - type of statement used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("statementType") - - @property - def referenced_tables(self): - """Return referenced tables from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables - - Returns: - List[Dict]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - tables = [] - datasets_by_project_name = {} - - for table in self._job_statistics().get("referencedTables", ()): - - t_project = table["projectId"] - - ds_id = table["datasetId"] - t_dataset = datasets_by_project_name.get((t_project, ds_id)) - if t_dataset is None: - t_dataset = DatasetReference(t_project, ds_id) - datasets_by_project_name[(t_project, ds_id)] = t_dataset - - t_name = table["tableId"] - tables.append(t_dataset.table(t_name)) - - return tables - - @property - def undeclared_query_parameters(self): - """Return undeclared query parameters from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters - - Returns: - List[Union[ \ - google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter \ - ]]: - Undeclared parameters, or an empty list if the query has - not yet completed. - """ - parameters = [] - undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) - - for parameter in undeclared: - p_type = parameter["parameterType"] - - if "arrayType" in p_type: - klass = ArrayQueryParameter - elif "structTypes" in p_type: - klass = StructQueryParameter - else: - klass = ScalarQueryParameter - - parameters.append(klass.from_api_repr(parameter)) - - return parameters - - @property - def estimated_bytes_processed(self): - """Return the estimated number of bytes processed by the query. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("estimatedBytesProcessed") - if result is not None: - result = int(result) - return result - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - is_done = ( - # Only consider a QueryJob complete when we know we have the final - # query results available. - self._query_results is not None - and self._query_results.complete - and self.state == _DONE_STATE - ) - # Do not refresh if the state is already done, as the job will not - # change once complete. - if not reload or is_done: - return is_done - - # Since the API to getQueryResults can hang up to the timeout value - # (default of 10 seconds), set the timeout parameter to ensure that - # the timeout from the futures API is respected. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 - timeout_ms = None - if self._done_timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - api_timeout = max(min(api_timeout, 10), 0) - self._done_timeout -= api_timeout - self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(api_timeout * 1000) - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete and self.state != _DONE_STATE: - self.reload(retry=retry, timeout=transport_timeout) - - return self.state == _DONE_STATE - - def _blocking_poll(self, timeout=None, **kwargs): - self._done_timeout = timeout - self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) - - @staticmethod - def _format_for_exception(query, job_id): - """Format a query for the output in exception message. - - Args: - query (str): The SQL query to format. - job_id (str): The ID of the job that ran the query. - - Returns: - str: A formatted query text. - """ - template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" - - lines = query.splitlines() - max_line_len = max(len(line) for line in lines) - - header = "-----Query Job SQL Follows-----" - header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) - - # Print out a "ruler" above and below the SQL so we can judge columns. - # Left pad for the line numbers (4 digits plus ":"). - ruler = " |" + " . |" * (max_line_len // 10) - - # Put line numbers next to the SQL. - body = "\n".join( - "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) - ) - - return template.format(job_id=job_id, header=header, ruler=ruler, body=body) - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType``. - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: If the job has already begun. - """ - - try: - super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - - def result( - self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): - """Start the job and wait for it to complete and get the result. - - Args: - page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. - max_results (Optional[int]): - The maximum total number of rows from this request. - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - start_index (Optional[int]): - The zero-based index of the starting row to read. - - Returns: - google.cloud.bigquery.table.RowIterator: - Iterator of row data - :class:`~google.cloud.bigquery.table.Row`-s. During each - page, the iterator will have the ``total_rows`` attribute - set, which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - - If the query is a special query that produces no results, e.g. - a DDL query, an ``_EmptyRowIterator`` instance is returned. - - Raises: - google.cloud.exceptions.GoogleCloudError: - If the job failed. - concurrent.futures.TimeoutError: - If the job did not complete in the given timeout. - """ - try: - super(QueryJob, self).result(retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - except requests.exceptions.Timeout as exc: - six.raise_from(concurrent.futures.TimeoutError, exc) - - # If the query job is complete but there are no query results, this was - # special job, such as a DDL query. Return an empty result set to - # indicate success and avoid calling tabledata.list on a table which - # can't be read (such as a view table). - if self._query_results.total_rows is None: - return _EmptyRowIterator() - - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, - page_size=page_size, - max_results=max_results, - start_index=start_index, - retry=retry, - timeout=timeout, - ) - rows._preserve_order = _contains_order_by(self.query) - return rows - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() - def to_arrow( - self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): - """[Beta] Create a class:`pyarrow.Table` by loading all pages of a - table or query. - - Args: - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - Possible values of ``progress_bar_type`` include: - - ``None`` - No progress bar. - ``'tqdm'`` - Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. - ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a - progress bar as a Jupyter notebook widget. - ``'tqdm_gui'`` - Use the :func:`tqdm.tqdm_gui` function to display a - progress bar as a graphical dialog box. - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This API - is a billable API. - - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - Returns: - pyarrow.Table - A :class:`pyarrow.Table` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - - ..versionadded:: 1.17.0 - """ - return self.result().to_arrow( - progress_bar_type=progress_bar_type, - bqstorage_client=bqstorage_client, - create_bqstorage_client=create_bqstorage_client, - ) - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() - def to_dataframe( - self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): - """Return a pandas DataFrame from a QueryJob - - Args: - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This - API is a billable API. - - This method requires the ``fastavro`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - - dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): - A dictionary of column names pandas ``dtype``s. The provided - ``dtype`` is used when constructing the series for the column - specified. Otherwise, the default pandas behavior is used. - - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - See - :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` - for details. - - ..versionadded:: 1.11.0 - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - ..versionadded:: 1.26.0 - - Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: If the `pandas` library cannot be imported. - """ - return self.result().to_dataframe( - bqstorage_client=bqstorage_client, - dtypes=dtypes, - progress_bar_type=progress_bar_type, - create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, - ) - - def __iter__(self): - return iter(self.result()) - - -class QueryPlanEntryStep(object): - """Map a single step in a query plan entry. - - Args: - kind (str): step type. - substeps (List): names of substeps. - """ - - def __init__(self, kind, substeps): - self.kind = kind - self.substeps = list(substeps) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource (Dict): JSON representation of the entry. - - Returns: - google.cloud.bigquery.job.QueryPlanEntryStep: - New instance built from the resource. - """ - return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return self.kind == other.kind and self.substeps == other.substeps - - -class QueryPlanEntry(object): - """QueryPlanEntry represents a single stage of a query execution plan. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - ExplainQueryStage representation returned from API. - - Returns: - google.cloud.bigquery.job.QueryPlanEntry: - Query plan entry parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def name(self): - """Optional[str]: Human-readable name of the stage.""" - return self._properties.get("name") - - @property - def entry_id(self): - """Optional[str]: Unique ID for the stage within the plan.""" - return self._properties.get("id") - - @property - def start(self): - """Optional[Datetime]: Datetime when the stage started.""" - if self._properties.get("startMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("startMs")) * 1000.0 - ) - - @property - def end(self): - """Optional[Datetime]: Datetime when the stage ended.""" - if self._properties.get("endMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("endMs")) * 1000.0 - ) - - @property - def input_stages(self): - """List(int): Entry IDs for stages that were inputs for this stage.""" - if self._properties.get("inputStages") is None: - return [] - return [ - _helpers._int_or_none(entry) - for entry in self._properties.get("inputStages") - ] - - @property - def parallel_inputs(self): - """Optional[int]: Number of parallel input segments within - the stage. - """ - return _helpers._int_or_none(self._properties.get("parallelInputs")) - - @property - def completed_parallel_inputs(self): - """Optional[int]: Number of parallel input segments completed.""" - return _helpers._int_or_none(self._properties.get("completedParallelInputs")) - - @property - def wait_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsAvg")) - - @property - def wait_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsMax")) - - @property - def wait_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioAvg") - - @property - def wait_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioMax") - - @property - def read_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsAvg")) - - @property - def read_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsMax")) - - @property - def read_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent reading - input, relative to the longest time spent by any worker in any stage - of the overall plan. - """ - return self._properties.get("readRatioAvg") - - @property - def read_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent reading - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("readRatioMax") - - @property - def compute_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsAvg")) - - @property - def compute_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsMax")) - - @property - def compute_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioAvg") - - @property - def compute_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioMax") - - @property - def write_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsAvg")) - - @property - def write_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsMax")) - - @property - def write_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioAvg") - - @property - def write_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioMax") - - @property - def records_read(self): - """Optional[int]: Number of records read by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsRead")) - - @property - def records_written(self): - """Optional[int]: Number of records written by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsWritten")) - - @property - def status(self): - """Optional[str]: status of this stage.""" - return self._properties.get("status") - - @property - def shuffle_output_bytes(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) - - @property - def shuffle_output_bytes_spilled(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle and spilled to disk. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) - - @property - def steps(self): - """List(QueryPlanEntryStep): List of step operations performed by - each worker in the stage. - """ - return [ - QueryPlanEntryStep.from_api_repr(step) - for step in self._properties.get("steps", []) - ] - - -class TimelineEntry(object): - """TimelineEntry represents progress of a query job at a particular - point in time. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - QueryTimelineSample representation returned from API. - - Returns: - google.cloud.bigquery.TimelineEntry: - Timeline sample parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def elapsed_ms(self): - """Optional[int]: Milliseconds elapsed since start of query - execution.""" - return _helpers._int_or_none(self._properties.get("elapsedMs")) - - @property - def active_units(self): - """Optional[int]: Current number of input units being processed - by workers, reported as largest value since the last sample.""" - return _helpers._int_or_none(self._properties.get("activeUnits")) - - @property - def pending_units(self): - """Optional[int]: Current number of input units remaining for - query stages active at this sample time.""" - return _helpers._int_or_none(self._properties.get("pendingUnits")) - - @property - def completed_units(self): - """Optional[int]: Current number of input units completed by - this query.""" - return _helpers._int_or_none(self._properties.get("completedUnits")) - - @property - def slot_millis(self): - """Optional[int]: Cumulative slot-milliseconds consumed by - this query.""" - return _helpers._int_or_none(self._properties.get("totalSlotMs")) - - -class UnknownJob(_AsyncJob): - """A job whose type cannot be determined.""" - - @classmethod - def from_api_repr(cls, resource, client): - """Construct an UnknownJob from the JSON representation. - - Args: - resource (Dict): JSON representation of a job. - client (google.cloud.bigquery.client.Client): - Client connected to BigQuery API. - - Returns: - UnknownJob: Job corresponding to the resource. - """ - job_ref_properties = resource.get("jobReference", {"projectId": client.project}) - job_ref = _JobReference._from_api_repr(job_ref_properties) - job = cls(job_ref, client) - # Populate the job reference with the project, even if it has been - # redacted, because we know it should equal that of the request. - resource["jobReference"] = job_ref_properties - job._properties = resource - return job - - -class ScriptStackFrame(object): - """Stack frame showing the line/column/procedure name where the current - evaluation happened. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def procedure_id(self): - """Optional[str]: Name of the active procedure. - - Omitted if in a top-level script. - """ - return self._properties.get("procedureId") - - @property - def text(self): - """str: Text of the current statement/expression.""" - return self._properties.get("text") - - @property - def start_line(self): - """int: One-based start line.""" - return _helpers._int_or_none(self._properties.get("startLine")) - - @property - def start_column(self): - """int: One-based start column.""" - return _helpers._int_or_none(self._properties.get("startColumn")) - - @property - def end_line(self): - """int: One-based end line.""" - return _helpers._int_or_none(self._properties.get("endLine")) - - @property - def end_column(self): - """int: One-based end column.""" - return _helpers._int_or_none(self._properties.get("endColumn")) - - -class ScriptStatistics(object): - """Statistics for a child job of a script. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def stack_frames(self): - """List[ScriptStackFrame]: Stack trace where the current evaluation - happened. - - Shows line/column/procedure name of each frame on the stack at the - point where the current evaluation happened. - - The leaf frame is first, the primary script is last. - """ - return [ - ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) - ] - - @property - def evaluation_kind(self): - """str: Indicates the type of child job. - - Possible values include ``STATEMENT`` and ``EXPRESSION``. - """ - return self._properties.get("evaluationKind") diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py new file mode 100644 index 000000000..26ecf8d3c --- /dev/null +++ b/google/cloud/bigquery/job/__init__.py @@ -0,0 +1,77 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Jobs.""" + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _error_result_to_exception +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ScriptStatistics +from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import UnknownJob +from google.cloud.bigquery.job.copy_ import CopyJob +from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.extract import ExtractJob +from google.cloud.bigquery.job.extract import ExtractJobConfig +from google.cloud.bigquery.job.load import LoadJob +from google.cloud.bigquery.job.load import LoadJobConfig +from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import QueryJob +from google.cloud.bigquery.job.query import QueryJobConfig +from google.cloud.bigquery.job.query import QueryPlanEntry +from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.enums import Compression +from google.cloud.bigquery.enums import CreateDisposition +from google.cloud.bigquery.enums import DestinationFormat +from google.cloud.bigquery.enums import Encoding +from google.cloud.bigquery.enums import QueryPriority +from google.cloud.bigquery.enums import SchemaUpdateOption +from google.cloud.bigquery.enums import SourceFormat +from google.cloud.bigquery.enums import WriteDisposition + + +# Include classes previously in job.py for backwards compatibility. +__all__ = [ + "_AsyncJob", + "_error_result_to_exception", + "_DONE_STATE", + "_JobConfig", + "_JobReference", + "ScriptStatistics", + "ScriptStackFrame", + "UnknownJob", + "CopyJob", + "CopyJobConfig", + "ExtractJob", + "ExtractJobConfig", + "LoadJob", + "LoadJobConfig", + "_contains_order_by", + "QueryJob", + "QueryJobConfig", + "QueryPlanEntry", + "QueryPlanEntryStep", + "TimelineEntry", + "Compression", + "CreateDisposition", + "DestinationFormat", + "Encoding", + "QueryPriority", + "SchemaUpdateOption", + "SourceFormat", + "WriteDisposition", +] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py new file mode 100644 index 000000000..2f4ae1460 --- /dev/null +++ b/google/cloud/bigquery/job/base.py @@ -0,0 +1,912 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes and helpers for job classes.""" + +import copy +import threading + +from google.api_core import exceptions +import google.api_core.future.polling +from six.moves import http_client + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.retry import DEFAULT_RETRY + + +_DONE_STATE = "DONE" +_STOPPED_REASON = "stopped" +_ERROR_REASON_TO_EXCEPTION = { + "accessDenied": http_client.FORBIDDEN, + "backendError": http_client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http_client.FORBIDDEN, + "billingTierLimitExceeded": http_client.BAD_REQUEST, + "blocked": http_client.FORBIDDEN, + "duplicate": http_client.CONFLICT, + "internalError": http_client.INTERNAL_SERVER_ERROR, + "invalid": http_client.BAD_REQUEST, + "invalidQuery": http_client.BAD_REQUEST, + "notFound": http_client.NOT_FOUND, + "notImplemented": http_client.NOT_IMPLEMENTED, + "quotaExceeded": http_client.FORBIDDEN, + "rateLimitExceeded": http_client.FORBIDDEN, + "resourceInUse": http_client.BAD_REQUEST, + "resourcesExceeded": http_client.BAD_REQUEST, + "responseTooLarge": http_client.FORBIDDEN, + "stopped": http_client.OK, + "tableUnavailable": http_client.BAD_REQUEST, +} + + +def _error_result_to_exception(error_result): + """Maps BigQuery error reasons to an exception. + + The reasons and their matching HTTP status codes are documented on + the `troubleshooting errors`_ page. + + .. _troubleshooting errors: https://cloud.google.com/bigquery\ + /troubleshooting-errors + + Args: + error_result (Mapping[str, str]): The error result from BigQuery. + + Returns: + google.cloud.exceptions.GoogleAPICallError: The mapped exception. + """ + reason = error_result.get("reason") + status_code = _ERROR_REASON_TO_EXCEPTION.get( + reason, http_client.INTERNAL_SERVER_ERROR + ) + return exceptions.from_http_status( + status_code, error_result.get("message", ""), errors=[error_result] + ) + + +class _JobReference(object): + """A reference to a job. + + Args: + job_id (str): ID of the job to run. + project (str): ID of the project where the job runs. + location (str): Location of where the job runs. + """ + + def __init__(self, job_id, project, location): + self._properties = {"jobId": job_id, "projectId": project} + # The location field must not be populated if it is None. + if location: + self._properties["location"] = location + + @property + def job_id(self): + """str: ID of the job.""" + return self._properties.get("jobId") + + @property + def project(self): + """str: ID of the project where the job runs.""" + return self._properties.get("projectId") + + @property + def location(self): + """str: Location where the job runs.""" + return self._properties.get("location") + + def _to_api_repr(self): + """Returns the API resource representation of the job reference.""" + return copy.deepcopy(self._properties) + + @classmethod + def _from_api_repr(cls, resource): + """Returns a job reference for an API resource representation.""" + job_id = resource.get("jobId") + project = resource.get("projectId") + location = resource.get("location") + job_ref = cls(job_id, project, location) + return job_ref + + +class _AsyncJob(google.api_core.future.polling.PollingFuture): + """Base class for asynchronous jobs. + + Args: + job_id (Union[str, _JobReference]): + Job's ID in the project associated with the client or a + fully-qualified job reference. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project configuration. + """ + + def __init__(self, job_id, client): + super(_AsyncJob, self).__init__() + + # The job reference can be either a plain job ID or the full resource. + # Populate the properties dictionary consistently depending on what has + # been passed in. + job_ref = job_id + if not isinstance(job_id, _JobReference): + job_ref = _JobReference(job_id, client.project, None) + self._properties = {"jobReference": job_ref._to_api_repr()} + + self._client = client + self._result_set = False + self._completion_lock = threading.Lock() + + @property + def job_id(self): + """str: ID of the job.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) + + @property + def parent_job_id(self): + """Return the ID of the parent job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id + + Returns: + Optional[str]: parent job id. + """ + return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) + + @property + def script_statistics(self): + resource = _helpers._get_sub_prop( + self._properties, ["statistics", "scriptStatistics"] + ) + if resource is None: + return None + return ScriptStatistics(resource) + + @property + def num_child_jobs(self): + """The number of child jobs executed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs + + Returns: + int + """ + count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) + return int(count) if count is not None else 0 + + @property + def project(self): + """Project bound to the job. + + Returns: + str: the project (derived from the client). + """ + return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) + + @property + def location(self): + """str: Location where the job runs.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + + def _require_client(self, client): + """Check client or verify over-ride. + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + Returns: + google.cloud.bigquery.client.Client: + The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + @property + def job_type(self): + """Type of job. + + Returns: + str: one of 'load', 'copy', 'extract', 'query'. + """ + return self._JOB_TYPE + + @property + def path(self): + """URL path for the job's APIs. + + Returns: + str: the path based on project and job ID. + """ + return "/projects/%s/jobs/%s" % (self.project, self.job_id) + + @property + def labels(self): + """Dict[str, str]: Labels for the job.""" + return self._properties.setdefault("labels", {}) + + @property + def etag(self): + """ETag for the job resource. + + Returns: + Optional[str]: the ETag (None until set from the server). + """ + return self._properties.get("etag") + + @property + def self_link(self): + """URL for the job resource. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("selfLink") + + @property + def user_email(self): + """E-mail address of user who submitted the job. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("user_email") + + @property + def created(self): + """Datetime at which the job was created. + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = _helpers._get_sub_prop( + self._properties, ["statistics", "creationTime"] + ) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def started(self): + """Datetime at which the job was started. + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def ended(self): + """Datetime at which the job finished. + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get("statistics", {}) + return statistics.get(self._JOB_TYPE, {}) + + @property + def error_result(self): + """Error information about the job as a whole. + + Returns: + Optional[Mapping]: the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errorResult") + + @property + def errors(self): + """Information about individual errors generated by the job. + + Returns: + Optional[List[Mapping]]: + the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errors") + + @property + def state(self): + """Status of the job. + + Returns: + Optional[str]: + the state (None until set from the server). + """ + status = self._properties.get("status", {}) + return status.get("state") + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + Args: + api_response (Dict): response returned from an API call. + """ + cleaned = api_response.copy() + + statistics = cleaned.get("statistics", {}) + if "creationTime" in statistics: + statistics["creationTime"] = float(statistics["creationTime"]) + if "startTime" in statistics: + statistics["startTime"] = float(statistics["startTime"]) + if "endTime" in statistics: + statistics["endTime"] = float(statistics["endTime"]) + + # Save configuration to keep reference same in self._configuration. + cleaned_config = cleaned.pop("configuration", {}) + configuration = self._properties.pop("configuration", {}) + self._properties.clear() + self._properties.update(cleaned) + self._properties["configuration"] = configuration + self._properties["configuration"].update(cleaned_config) + + # For Future interface + self._set_future_result() + + @classmethod + def _check_resource_config(cls, resource): + """Helper for :meth:`from_api_repr` + + Args: + resource (Dict): resource for the job. + + Raises: + KeyError: + If the resource has no identifier, or + is missing the appropriate configuration. + """ + if "jobReference" not in resource or "jobId" not in resource["jobReference"]: + raise KeyError( + "Resource lacks required identity information: " + '["jobReference"]["jobId"]' + ) + if ( + "configuration" not in resource + or cls._JOB_TYPE not in resource["configuration"] + ): + raise KeyError( + "Resource lacks required configuration: " + '["configuration"]["%s"]' % cls._JOB_TYPE + ) + + def to_api_repr(self): + """Generate a resource for the job.""" + return copy.deepcopy(self._properties) + + _build_resource = to_api_repr # backward-compatibility alias + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType`` + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: + If the job has already begun. + """ + if self.state is not None: + raise ValueError("Job already begun.") + + client = self._require_client(client) + path = "/projects/%s/jobs" % (self.project,) + + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + span_attributes = {"path": path} + api_response = client._call_api( + retry, + span_name="BigQuery.job.begin", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + data=self.to_api_repr(), + timeout=timeout, + ) + self._set_properties(api_response) + + def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: test for the existence of the job via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Returns: + bool: Boolean indicating existence of the job. + """ + client = self._require_client(client) + + extra_params = {"fields": "id"} + if self.location: + extra_params["location"] = self.location + + try: + span_attributes = {"path": self.path} + + client._call_api( + retry, + span_name="BigQuery.job.exists", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + except exceptions.NotFound: + return False + else: + return True + + def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: refresh job properties via a GET request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + span_attributes = {"path": self.path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.reload", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response) + + def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: cancel job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry`` + + Returns: + bool: Boolean indicating that the cancel request was sent. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + + path = "{}/cancel".format(self.path) + span_attributes = {"path": path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.cancel", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response["job"]) + # The Future interface requires that we return True if the *attempt* + # to cancel was successful. + return True + + # The following methods implement the PollingFuture interface. Note that + # the methods above are from the pre-Future interface and are left for + # compatibility. The only "overloaded" method is :meth:`cancel`, which + # satisfies both interfaces. + + def _set_future_result(self): + """Set the result or exception from the job if it is complete.""" + # This must be done in a lock to prevent the polling thread + # and main thread from both executing the completion logic + # at the same time. + with self._completion_lock: + # If the operation isn't complete or if the result has already been + # set, do not call set_result/set_exception again. + # Note: self._result_set is set to True in set_result and + # set_exception, in case those methods are invoked directly. + if not self.done(reload=False) or self._result_set: + return + + if self.error_result is not None: + exception = _error_result_to_exception(self.error_result) + self.set_exception(exception) + else: + self.set_result(self) + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + # Do not refresh is the state is already done, as the job will not + # change once complete. + if self.state != _DONE_STATE and reload: + self.reload(retry=retry, timeout=timeout) + return self.state == _DONE_STATE + + def result(self, retry=DEFAULT_RETRY, timeout=None): + """Start the job and wait for it to complete and get the result. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + + Returns: + _AsyncJob: This instance. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + if the job failed. + concurrent.futures.TimeoutError: + if the job did not complete in the given timeout. + """ + if self.state is None: + self._begin(retry=retry, timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + + def cancelled(self): + """Check if the job has been cancelled. + + This always returns False. It's not possible to check if a job was + cancelled in the API. This method is here to satisfy the interface + for :class:`google.api_core.future.Future`. + + Returns: + bool: False + """ + return ( + self.error_result is not None + and self.error_result.get("reason") == _STOPPED_REASON + ) + + +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Args: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type, **kwargs): + self._job_type = job_type + self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) + + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. To change a job's labels, + modify the dict, then call ``Client.update_job``. To delete a + label, set its value to :data:`None` before updating. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties["labels"] = value + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helpers._get_sub_prop`` function:: + + _helpers._get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Args: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (Optional[object]): + Default value to return if the key is not found. + Defaults to :data:`None`. + + Returns: + object: The value if present or the default. + """ + return _helpers._get_sub_prop( + self._properties, [self._job_type, key], default=default + ) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper._set_sub_prop`` function:: + + _helper._set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Args: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + + def _del_sub_prop(self, key): + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Args: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + + def to_api_repr(self): + """Build an API representation of the job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Args: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig: A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + + ", " + + repr(default_job_config._job_type) + ) + + new_job_config = self.__class__() + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type].update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + Args: + resource (Dict): + A job configuration in the same representation as is returned + from the API. + + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. + """ + job_config = cls() + job_config._properties = resource + return job_config + + +class ScriptStackFrame(object): + """Stack frame showing the line/column/procedure name where the current + evaluation happened. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def procedure_id(self): + """Optional[str]: Name of the active procedure. + + Omitted if in a top-level script. + """ + return self._properties.get("procedureId") + + @property + def text(self): + """str: Text of the current statement/expression.""" + return self._properties.get("text") + + @property + def start_line(self): + """int: One-based start line.""" + return _helpers._int_or_none(self._properties.get("startLine")) + + @property + def start_column(self): + """int: One-based start column.""" + return _helpers._int_or_none(self._properties.get("startColumn")) + + @property + def end_line(self): + """int: One-based end line.""" + return _helpers._int_or_none(self._properties.get("endLine")) + + @property + def end_column(self): + """int: One-based end column.""" + return _helpers._int_or_none(self._properties.get("endColumn")) + + +class ScriptStatistics(object): + """Statistics for a child job of a script. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def stack_frames(self): + """List[ScriptStackFrame]: Stack trace where the current evaluation + happened. + + Shows line/column/procedure name of each frame on the stack at the + point where the current evaluation happened. + + The leaf frame is first, the primary script is last. + """ + return [ + ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) + ] + + @property + def evaluation_kind(self): + """str: Indicates the type of child job. + + Possible values include ``STATEMENT`` and ``EXPRESSION``. + """ + return self._properties.get("evaluationKind") + + +class UnknownJob(_AsyncJob): + """A job whose type cannot be determined.""" + + @classmethod + def from_api_repr(cls, resource, client): + """Construct an UnknownJob from the JSON representation. + + Args: + resource (Dict): JSON representation of a job. + client (google.cloud.bigquery.client.Client): + Client connected to BigQuery API. + + Returns: + UnknownJob: Job corresponding to the resource. + """ + job_ref_properties = resource.get("jobReference", {"projectId": client.project}) + job_ref = _JobReference._from_api_repr(job_ref_properties) + job = cls(job_ref, client) + # Populate the job reference with the project, even if it has been + # redacted, because we know it should equal that of the request. + resource["jobReference"] = job_ref_properties + job._properties = resource + return job diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py new file mode 100644 index 000000000..95f4b613b --- /dev/null +++ b/google/cloud/bigquery/job/copy_.py @@ -0,0 +1,223 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for copy jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.table import TableReference + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class CopyJobConfig(_JobConfig): + """Configuration options for copy jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(CopyJobConfig, self).__init__("copy", **kwargs) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + +class CopyJob(_AsyncJob): + """Asynchronous job: copy data into a table from other tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. + + destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): + Extra configuration options for the copy job. + """ + + _JOB_TYPE = "copy" + + def __init__(self, job_id, sources, destination, client, job_config=None): + super(CopyJob, self).__init__(job_id, client) + + if not job_config: + job_config = CopyJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if destination: + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "destinationTable"], + destination.to_api_repr(), + ) + + if sources: + source_resources = [source.to_api_repr() for source in sources] + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "sourceTables"], + source_resources, + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: Table into which data + is to be loaded. + """ + return TableReference.from_api_repr( + _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "destinationTable"] + ) + ) + + @property + def sources(self): + """List[google.cloud.bigquery.table.TableReference]): Table(s) from + which data is to be loaded. + """ + source_configs = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTables"] + ) + if source_configs is None: + single = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTable"] + ) + if single is None: + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] + + sources = [] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return sources + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py new file mode 100644 index 000000000..a6e262a32 --- /dev/null +++ b/google/cloud/bigquery/job/extract.py @@ -0,0 +1,266 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for extract (export) jobs.""" + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class ExtractJobConfig(_JobConfig): + """Configuration options for extract jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(ExtractJobConfig, self).__init__("extract", **kwargs) + + @property + def compression(self): + """google.cloud.bigquery.job.Compression: Compression type to use for + exported files. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression + """ + return self._get_sub_prop("compression") + + @compression.setter + def compression(self, value): + self._set_sub_prop("compression", value) + + @property + def destination_format(self): + """google.cloud.bigquery.job.DestinationFormat: Exported file format. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format + """ + return self._get_sub_prop("destinationFormat") + + @destination_format.setter + def destination_format(self, value): + self._set_sub_prop("destinationFormat", value) + + @property + def field_delimiter(self): + """str: Delimiter to use between fields in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def print_header(self): + """bool: Print a header row in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header + """ + return self._get_sub_prop("printHeader") + + @print_header.setter + def print_header(self, value): + self._set_sub_prop("printHeader", value) + + @property + def use_avro_logical_types(self): + """bool: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + +class ExtractJob(_AsyncJob): + """Asynchronous job: extract data from a table into Cloud Storage. + + Args: + job_id (str): the job's ID. + + source (Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]): + Table or Model from which data is to be loaded or extracted. + + destination_uris (List[str]): + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration. + + job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): + Extra configuration options for the extract job. + """ + + _JOB_TYPE = "extract" + + def __init__(self, job_id, source, destination_uris, client, job_config=None): + super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if source: + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} + + if isinstance(source, (Table, TableListItem, TableReference)): + source_ref["tableId"] = source.table_id + source_key = "sourceTable" + else: + source_ref["modelId"] = source.model_id + source_key = "sourceModel" + + _helpers._set_sub_prop( + self._properties, ["configuration", "extract", source_key], source_ref + ) + + if destination_uris: + _helpers._set_sub_prop( + self._properties, + ["configuration", "extract", "destinationUris"], + destination_uris, + ) + + @property + def source(self): + """Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]: Table or Model from which data is to be loaded or extracted. + """ + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceTable"] + ) + if source_config: + return TableReference.from_api_repr(source_config) + else: + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceModel"] + ) + return ModelReference.from_api_repr(source_config) + + @property + def destination_uris(self): + """List[str]: URIs describing where the extracted data will be + written in Cloud Storage, using the format + ``gs:///``. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "destinationUris"] + ) + + @property + def compression(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. + """ + return self._configuration.compression + + @property + def destination_format(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def print_header(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header + + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts + + Returns: + List[int]: + A list of integer counts, each representing the number of files + per destination URI or URI pattern specified in the extract + configuration. These values will be in the same order as the URIs + specified in the 'destinationUris' field. Returns None if job is + not yet complete. + """ + counts = self._job_statistics().get("destinationUriFileCounts") + if counts is not None: + return [int(count) for count in counts] + return None + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py new file mode 100644 index 000000000..e784af0a6 --- /dev/null +++ b/google/cloud/bigquery/job/load.py @@ -0,0 +1,758 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for load jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class LoadJobConfig(_JobConfig): + """Configuration options for load jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(LoadJobConfig, self).__init__("load", **kwargs) + + @property + def allow_jagged_rows(self): + """Optional[bool]: Allow missing trailing optional columns (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows + """ + return self._get_sub_prop("allowJaggedRows") + + @allow_jagged_rows.setter + def allow_jagged_rows(self, value): + self._set_sub_prop("allowJaggedRows", value) + + @property + def allow_quoted_newlines(self): + """Optional[bool]: Allow quoted data containing newline characters (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines + """ + return self._get_sub_prop("allowQuotedNewlines") + + @allow_quoted_newlines.setter + def allow_quoted_newlines(self, value): + self._set_sub_prop("allowQuotedNewlines", value) + + @property + def autodetect(self): + """Optional[bool]: Automatically infer the schema from a sample of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect + """ + return self._get_sub_prop("autodetect") + + @autodetect.setter + def autodetect(self, value): + self._set_sub_prop("autodetect", value) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def create_disposition(self): + """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior + for creating tables. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def destination_encryption_configuration(self): + """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + else: + self._del_sub_prop("destinationEncryptionConfiguration") + + @property + def destination_table_description(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["description"] + + @destination_table_description.setter + def destination_table_description(self, value): + keys = [self._job_type, "destinationTableProperties", "description"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def destination_table_friendly_name(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["friendlyName"] + + @destination_table_friendly_name.setter + def destination_table_friendly_name(self, value): + keys = [self._job_type, "destinationTableProperties", "friendlyName"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def encoding(self): + """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the + data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding + """ + return self._get_sub_prop("encoding") + + @encoding.setter + def encoding(self, value): + self._set_sub_prop("encoding", value) + + @property + def field_delimiter(self): + """Optional[str]: The separator for fields in a CSV file. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + + @property + def ignore_unknown_values(self): + """Optional[bool]: Ignore extra values not represented in the table schema. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values + """ + return self._get_sub_prop("ignoreUnknownValues") + + @ignore_unknown_values.setter + def ignore_unknown_values(self, value): + self._set_sub_prop("ignoreUnknownValues", value) + + @property + def max_bad_records(self): + """Optional[int]: Number of invalid rows to ignore. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records + """ + return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) + + @max_bad_records.setter + def max_bad_records(self, value): + self._set_sub_prop("maxBadRecords", value) + + @property + def null_marker(self): + """Optional[str]: Represents a null value (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker + """ + return self._get_sub_prop("nullMarker") + + @null_marker.setter + def null_marker(self, value): + self._set_sub_prop("nullMarker", value) + + @property + def quote_character(self): + """Optional[str]: Character used to quote data sections (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote + """ + return self._get_sub_prop("quote") + + @quote_character.setter + def quote_character(self, value): + self._set_sub_prop("quote", value) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def schema(self): + """Optional[Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]]: Schema of the destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema + """ + schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) + if schema is None: + return + return [SchemaField.from_api_repr(field) for field in schema] + + @schema.setter + def schema(self, value): + if value is None: + self._del_sub_prop("schema") + return + + value = _to_schema_fields(value) + + _helpers._set_sub_prop( + self._properties, + ["load", "schema", "fields"], + [field.to_api_repr() for field in value], + ) + + @property + def schema_update_options(self): + """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies + updates to the destination table schema to allow as a side effect of + the load job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + @property + def skip_leading_rows(self): + """Optional[int]: Number of rows to skip when reading data (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows + """ + return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._set_sub_prop("skipLeadingRows", str(value)) + + @property + def source_format(self): + """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format + """ + return self._get_sub_prop("sourceFormat") + + @source_format.setter + def source_format(self, value): + self._set_sub_prop("sourceFormat", value) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based + partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + else: + self._del_sub_prop("timePartitioning") + + @property + def use_avro_logical_types(self): + """Optional[bool]: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + @property + def write_disposition(self): + """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if + the destination table already exists. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table. + + Can load from Google Cloud Storage URIs or from a file. + + Args: + job_id (str): the job's ID + + source_uris (Optional[Sequence[str]]): + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. Pass None for jobs that load from a file. + + destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + + _JOB_TYPE = "load" + + def __init__(self, job_id, source_uris, destination, client, job_config=None): + super(LoadJob, self).__init__(job_id, client) + + if not job_config: + job_config = LoadJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if source_uris is not None: + _helpers._set_sub_prop( + self._properties, ["configuration", "load", "sourceUris"], source_uris + ) + + if destination is not None: + _helpers._set_sub_prop( + self._properties, + ["configuration", "load", "destinationTable"], + destination.to_api_repr(), + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where loaded rows are written + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table + """ + dest_config = _helpers._get_sub_prop( + self._properties, ["configuration", "load", "destinationTable"] + ) + return TableReference.from_api_repr(dest_config) + + @property + def source_uris(self): + """Optional[Sequence[str]]: URIs of data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. None for jobs that load from a file. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "load", "sourceUris"] + ) + + @property + def allow_jagged_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values + + @property + def max_bad_records(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def schema(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. + """ + return self._configuration.schema + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) + or :data:`None` if using default encryption. + + See + :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def destination_table_description(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + return self._configuration.destination_table_description + + @property + def destination_table_friendly_name(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + return self._configuration.destination_table_friendly_name + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def use_avro_logical_types(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. + """ + return self._configuration.use_avro_logical_types + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + @property + def input_file_bytes(self): + """Count of bytes loaded from source files. + + Returns: + Optional[int]: the count (None until set from the server). + + Raises: + ValueError: for invalid value types. + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFileBytes"] + ) + ) + + @property + def input_files(self): + """Count of source files. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFiles"] + ) + ) + + @property + def output_bytes(self): + """Count of bytes saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputBytes"] + ) + ) + + @property + def output_rows(self): + """Count of rows saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputRows"] + ) + ) + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py new file mode 100644 index 000000000..e25077360 --- /dev/null +++ b/google/cloud/bigquery/job/query.py @@ -0,0 +1,1644 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for query jobs.""" + +import concurrent.futures +import copy +import re + +from google.api_core import exceptions +import requests +import six + +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.table import _EmptyRowIterator +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import _table_arg_to_table_ref +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) +_TIMEOUT_BUFFER_SECS = 0.1 + + +def _contains_order_by(query): + """Do we need to preserve the order of the query results? + + This function has known false positives, such as with ordered window + functions: + + .. code-block:: sql + + SELECT SUM(x) OVER ( + window_name + PARTITION BY... + ORDER BY... + window_frame_clause) + FROM ... + + This false positive failure case means the behavior will be correct, but + downloading results with the BigQuery Storage API may be slower than it + otherwise would. This is preferable to the false negative case, where + results are expected to be in order but are not (due to parallel reads). + """ + return query and _CONTAINS_ORDER_BY.search(query) + + +def _from_api_repr_query_parameters(resource): + return [_query_param_from_api_repr(mapping) for mapping in resource] + + +def _to_api_repr_query_parameters(value): + return [query_parameter.to_api_repr() for query_parameter in value] + + +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources + + +def _to_api_repr_udf_resources(value): + return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] + + +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + +class QueryJobConfig(_JobConfig): + """Configuration options for query jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(QueryJobConfig, self).__init__("query", **kwargs) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + @property + def allow_large_results(self): + """bool: Allow large query results tables (legacy SQL, only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results + """ + return self._get_sub_prop("allowLargeResults") + + @allow_large_results.setter + def allow_large_results(self, value): + self._set_sub_prop("allowLargeResults", value) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def default_dataset(self): + """google.cloud.bigquery.dataset.DatasetReference: the default dataset + to use for unqualified table names in the query or :data:`None` if not + set. + + The ``default_dataset`` setter accepts: + + - a :class:`~google.cloud.bigquery.dataset.Dataset`, or + - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or + - a :class:`str` of the fully-qualified dataset ID in standard SQL + format. The value must included a project ID and dataset ID + separated by ``.``. For example: ``your-project.your_dataset``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset + """ + prop = self._get_sub_prop("defaultDataset") + if prop is not None: + prop = DatasetReference.from_api_repr(prop) + return prop + + @default_dataset.setter + def default_dataset(self, value): + if value is None: + self._set_sub_prop("defaultDataset", None) + return + + if isinstance(value, six.string_types): + value = DatasetReference.from_string(value) + + if isinstance(value, (Dataset, DatasetListItem)): + value = value.reference + + resource = value.to_api_repr() + self._set_sub_prop("defaultDataset", resource) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where results are + written or :data:`None` if not set. + + The ``destination`` setter accepts: + + - a :class:`~google.cloud.bigquery.table.Table`, or + - a :class:`~google.cloud.bigquery.table.TableReference`, or + - a :class:`str` of the fully-qualified table ID in standard SQL + format. The value must included a project ID, dataset ID, and table + ID, each separated by ``.``. For example: + ``your-project.your_dataset.your_table``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table + """ + prop = self._get_sub_prop("destinationTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @destination.setter + def destination(self, value): + if value is None: + self._set_sub_prop("destinationTable", None) + return + + value = _table_arg_to_table_ref(value) + resource = value.to_api_repr() + self._set_sub_prop("destinationTable", resource) + + @property + def dry_run(self): + """bool: :data:`True` if this query should be a dry run to estimate + costs. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run + """ + return self._properties.get("dryRun") + + @dry_run.setter + def dry_run(self, value): + self._properties["dryRun"] = value + + @property + def flatten_results(self): + """bool: Flatten nested/repeated fields in results. (Legacy SQL only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results + """ + return self._get_sub_prop("flattenResults") + + @flatten_results.setter + def flatten_results(self, value): + self._set_sub_prop("flattenResults", value) + + @property + def maximum_billing_tier(self): + """int: Deprecated. Changes the billing tier to allow high-compute + queries. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier + """ + return self._get_sub_prop("maximumBillingTier") + + @maximum_billing_tier.setter + def maximum_billing_tier(self, value): + self._set_sub_prop("maximumBillingTier", value) + + @property + def maximum_bytes_billed(self): + """int: Maximum bytes to be billed for this job or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed + """ + return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) + + @maximum_bytes_billed.setter + def maximum_bytes_billed(self, value): + self._set_sub_prop("maximumBytesBilled", str(value)) + + @property + def priority(self): + """google.cloud.bigquery.job.QueryPriority: Priority of the query. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority + """ + return self._get_sub_prop("priority") + + @priority.setter + def priority(self, value): + self._set_sub_prop("priority", value) + + @property + def query_parameters(self): + """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter]]: list of parameters + for parameterized query (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters + """ + prop = self._get_sub_prop("queryParameters", default=[]) + return _from_api_repr_query_parameters(prop) + + @query_parameters.setter + def query_parameters(self, values): + self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def udf_resources(self): + """List[google.cloud.bigquery.query.UDFResource]: user + defined function resources (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources + """ + prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) + return _from_api_repr_udf_resources(prop) + + @udf_resources.setter + def udf_resources(self, values): + self._set_sub_prop( + "userDefinedFunctionResources", _to_api_repr_udf_resources(values) + ) + + @property + def use_legacy_sql(self): + """bool: Use legacy SQL syntax. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql + """ + return self._get_sub_prop("useLegacySql") + + @use_legacy_sql.setter + def use_legacy_sql(self, value): + self._set_sub_prop("useLegacySql", value) + + @property + def use_query_cache(self): + """bool: Look for the query result in the cache. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache + """ + return self._get_sub_prop("useQueryCache") + + @use_query_cache.setter + def use_query_cache(self, value): + self._set_sub_prop("useQueryCache", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def table_definitions(self): + """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: + Definitions for external tables or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions + """ + prop = self._get_sub_prop("tableDefinitions") + if prop is not None: + prop = _from_api_repr_table_defs(prop) + return prop + + @table_definitions.setter + def table_definitions(self, values): + self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies + time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the query job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + def to_api_repr(self): + """Build an API representation of the query job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource["query"].get("queryParameters") + if query_parameters: + if query_parameters[0].get("name") is None: + resource["query"]["parameterMode"] = "POSITIONAL" + else: + resource["query"]["parameterMode"] = "NAMED" + + return resource + + +class QueryJob(_AsyncJob): + """Asynchronous job: query tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + query (str): SQL query string. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the query job. + """ + + _JOB_TYPE = "query" + _UDF_KEY = "userDefinedFunctionResources" + + def __init__(self, job_id, query, client, job_config=None): + super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if query: + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + + self._query_results = None + self._done_timeout = None + self._transport_timeout = None + + @property + def allow_large_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. + """ + return self._configuration.allow_large_results + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. + """ + return self._configuration.destination + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def dry_run(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @property + def flatten_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. + """ + return self._configuration.flatten_results + + @property + def priority(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. + """ + return self._configuration.priority + + @property + def query(self): + """str: The query text used in this query job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "query", "query"] + ) + + @property + def query_parameters(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters + + @property + def udf_resources(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources + + @property + def use_legacy_sql(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @property + def use_query_cache(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def maximum_billing_tier(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier + + @property + def maximum_bytes_billed(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def table_definitions(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Use to_api_repr to allow for some configuration properties to be set + # automatically. + configuration = self._configuration.to_api_repr() + return { + "jobReference": self._properties["jobReference"], + "configuration": configuration, + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, client=client) + job._set_properties(resource) + return job + + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan + + Returns: + List[google.cloud.bigquery.job.QueryPlanEntry]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get("queryPlan", ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + + @property + def timeline(self): + """List(TimelineEntry): Return the query execution timeline + from job statistics. + """ + raw = self._job_statistics().get("timeline", ()) + return [TimelineEntry.from_api_repr(entry) for entry in raw] + + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesProcessed") + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesBilled") + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier + + Returns: + Optional[int]: + Billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("billingTier") + + @property + def cache_hit(self): + """Return whether or not query results were served from cache. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit + + Returns: + Optional[bool]: + whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get("cacheHit") + + @property + def ddl_operation_performed(self): + """Optional[str]: Return the DDL operation performed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed + + """ + return self._job_statistics().get("ddlOperationPerformed") + + @property + def ddl_target_routine(self): + """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present + for CREATE/DROP FUNCTION/PROCEDURE queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine + """ + prop = self._job_statistics().get("ddlTargetRoutine") + if prop is not None: + prop = RoutineReference.from_api_repr(prop) + return prop + + @property + def ddl_target_table(self): + """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present + for CREATE/DROP TABLE/VIEW queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table + """ + prop = self._job_statistics().get("ddlTargetTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @property + def num_dml_affected_rows(self): + """Return the number of DML rows affected by the job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("numDmlAffectedRows") + if result is not None: + result = int(result) + return result + + @property + def slot_millis(self): + """Union[int, None]: Slot-milliseconds used by this query job.""" + return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type + + Returns: + Optional[str]: + type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("statementType") + + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables + + Returns: + List[Dict]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + datasets_by_project_name = {} + + for table in self._job_statistics().get("referencedTables", ()): + + t_project = table["projectId"] + + ds_id = table["datasetId"] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) + if t_dataset is None: + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset + + t_name = table["tableId"] + tables.append(t_dataset.table(t_name)) + + return tables + + @property + def undeclared_query_parameters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters + + Returns: + List[Union[ \ + google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter \ + ]]: + Undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) + + for parameter in undeclared: + p_type = parameter["parameterType"] + + if "arrayType" in p_type: + klass = ArrayQueryParameter + elif "structTypes" in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + + @property + def estimated_bytes_processed(self): + """Return the estimated number of bytes processed by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("estimatedBytesProcessed") + if result is not None: + result = int(result) + return result + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Refresh the job and checks if it is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(api_timeout * 1000) + + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) + + return self.state == _DONE_STATE + + def _blocking_poll(self, timeout=None, **kwargs): + self._done_timeout = timeout + self._transport_timeout = timeout + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) + + @staticmethod + def _format_for_exception(query, job_id): + """Format a query for the output in exception message. + + Args: + query (str): The SQL query to format. + job_id (str): The ID of the job that ran the query. + + Returns: + str: A formatted query text. + """ + template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + + lines = query.splitlines() + max_line_len = max(len(line) for line in lines) + + header = "-----Query Job SQL Follows-----" + header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) + + # Print out a "ruler" above and below the SQL so we can judge columns. + # Left pad for the line numbers (4 digits plus ":"). + ruler = " |" + " . |" * (max_line_len // 10) + + # Put line numbers next to the SQL. + body = "\n".join( + "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) + ) + + return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: If the job has already begun. + """ + + try: + super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + + def result( + self, + page_size=None, + max_results=None, + retry=DEFAULT_RETRY, + timeout=None, + start_index=None, + ): + """Start the job and wait for it to complete and get the result. + + Args: + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves rows. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + start_index (Optional[int]): + The zero-based index of the starting row to read. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + If the job failed. + concurrent.futures.TimeoutError: + If the job did not complete in the given timeout. + """ + try: + super(QueryJob, self).result(retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + except requests.exceptions.Timeout as exc: + six.raise_from(concurrent.futures.TimeoutError, exc) + + # If the query job is complete but there are no query results, this was + # special job, such as a DDL query. Return an empty result set to + # indicate success and avoid calling tabledata.list on a table which + # can't be read (such as a view table). + if self._query_results.total_rows is None: + return _EmptyRowIterator() + + schema = self._query_results.schema + dest_table_ref = self.destination + dest_table = Table(dest_table_ref, schema=schema) + dest_table._properties["numRows"] = self._query_results.total_rows + rows = self._client.list_rows( + dest_table, + page_size=page_size, + max_results=max_results, + start_index=start_index, + retry=retry, + timeout=timeout, + ) + rows._preserve_order = _contains_order_by(self.query) + return rows + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_arrow() + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=True, + ): + """[Beta] Create a class:`pyarrow.Table` by loading all pages of a + table or query. + + Args: + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This API + is a billable API. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + Returns: + pyarrow.Table + A :class:`pyarrow.Table` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + ..versionadded:: 1.17.0 + """ + return self.result().to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe() + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + ): + """Return a pandas DataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + ..versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + ..versionadded:: 1.26.0 + + Returns: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: If the `pandas` library cannot be imported. + """ + return self.result().to_dataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + ) + + def __iter__(self): + return iter(self.result()) + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + Args: + kind (str): step type. + substeps (List): names of substeps. + """ + + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource (Dict): JSON representation of the entry. + + Returns: + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. + """ + return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """QueryPlanEntry represents a single stage of a query execution plan. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + ExplainQueryStage representation returned from API. + + Returns: + google.cloud.bigquery.job.QueryPlanEntry: + Query plan entry parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def name(self): + """Optional[str]: Human-readable name of the stage.""" + return self._properties.get("name") + + @property + def entry_id(self): + """Optional[str]: Unique ID for the stage within the plan.""" + return self._properties.get("id") + + @property + def start(self): + """Optional[Datetime]: Datetime when the stage started.""" + if self._properties.get("startMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("startMs")) * 1000.0 + ) + + @property + def end(self): + """Optional[Datetime]: Datetime when the stage ended.""" + if self._properties.get("endMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("endMs")) * 1000.0 + ) + + @property + def input_stages(self): + """List(int): Entry IDs for stages that were inputs for this stage.""" + if self._properties.get("inputStages") is None: + return [] + return [ + _helpers._int_or_none(entry) + for entry in self._properties.get("inputStages") + ] + + @property + def parallel_inputs(self): + """Optional[int]: Number of parallel input segments within + the stage. + """ + return _helpers._int_or_none(self._properties.get("parallelInputs")) + + @property + def completed_parallel_inputs(self): + """Optional[int]: Number of parallel input segments completed.""" + return _helpers._int_or_none(self._properties.get("completedParallelInputs")) + + @property + def wait_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsAvg")) + + @property + def wait_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsMax")) + + @property + def wait_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioAvg") + + @property + def wait_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioMax") + + @property + def read_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsAvg")) + + @property + def read_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsMax")) + + @property + def read_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent reading + input, relative to the longest time spent by any worker in any stage + of the overall plan. + """ + return self._properties.get("readRatioAvg") + + @property + def read_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent reading + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("readRatioMax") + + @property + def compute_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsAvg")) + + @property + def compute_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsMax")) + + @property + def compute_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioAvg") + + @property + def compute_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioMax") + + @property + def write_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsAvg")) + + @property + def write_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsMax")) + + @property + def write_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioAvg") + + @property + def write_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioMax") + + @property + def records_read(self): + """Optional[int]: Number of records read by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsRead")) + + @property + def records_written(self): + """Optional[int]: Number of records written by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsWritten")) + + @property + def status(self): + """Optional[str]: status of this stage.""" + return self._properties.get("status") + + @property + def shuffle_output_bytes(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) + + @property + def shuffle_output_bytes_spilled(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle and spilled to disk. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) + + @property + def steps(self): + """List(QueryPlanEntryStep): List of step operations performed by + each worker in the stage. + """ + return [ + QueryPlanEntryStep.from_api_repr(step) + for step in self._properties.get("steps", []) + ] + + +class TimelineEntry(object): + """TimelineEntry represents progress of a query job at a particular + point in time. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + QueryTimelineSample representation returned from API. + + Returns: + google.cloud.bigquery.TimelineEntry: + Timeline sample parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def elapsed_ms(self): + """Optional[int]: Milliseconds elapsed since start of query + execution.""" + return _helpers._int_or_none(self._properties.get("elapsedMs")) + + @property + def active_units(self): + """Optional[int]: Current number of input units being processed + by workers, reported as largest value since the last sample.""" + return _helpers._int_or_none(self._properties.get("activeUnits")) + + @property + def pending_units(self): + """Optional[int]: Current number of input units remaining for + query stages active at this sample time.""" + return _helpers._int_or_none(self._properties.get("pendingUnits")) + + @property + def completed_units(self): + """Optional[int]: Current number of input units completed by + this query.""" + return _helpers._int_or_none(self._properties.get("completedUnits")) + + @property + def slot_millis(self): + """Optional[int]: Cumulative slot-milliseconds consumed by + this query.""" + return _helpers._int_or_none(self._properties.get("totalSlotMs")) diff --git a/tests/unit/job/__init__.py b/tests/unit/job/__init__.py new file mode 100644 index 000000000..c6334245a --- /dev/null +++ b/tests/unit/job/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py new file mode 100644 index 000000000..f928054f6 --- /dev/null +++ b/tests/unit/job/helpers.py @@ -0,0 +1,198 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock +from google.api_core import exceptions + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project="test-project", connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _make_connection() + + client = Client(project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + + +def _make_connection(*responses): + import google.cloud.bigquery._http + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + return mock_conn + + +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + +def _make_job_resource( + creation_time_ms=1437767599006, + started_time_ms=1437767600007, + ended_time_ms=1437767601008, + started=False, + ended=False, + etag="abc-def-hjk", + endpoint="https://bigquery.googleapis.com", + job_type="load", + job_id="a-random-id", + project_id="some-project", + user_email="bq-user@example.com", +): + resource = { + "status": {"state": "PENDING"}, + "configuration": {job_type: {}}, + "statistics": {"creationTime": creation_time_ms, job_type: {}}, + "etag": etag, + "id": "{}:{}".format(project_id, job_id), + "jobReference": {"projectId": project_id, "jobId": job_id}, + "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( + endpoint, project_id, job_id + ), + "user_email": user_email, + } + + if started or ended: + resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" + + if ended: + resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" + + if job_type == "query": + resource["configuration"]["query"]["destinationTable"] = { + "projectId": project_id, + "datasetId": "_temp_dataset", + "tableId": "_temp_table", + } + + return resource + + +class _Base(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + + ENDPOINT = "https://bigquery.googleapis.com" + PROJECT = "project" + SOURCE1 = "http://example.com/source1.csv" + DS_ID = "dataset_id" + DS_REF = DatasetReference(PROJECT, DS_ID) + TABLE_ID = "table_id" + TABLE_REF = TableReference(DS_REF, TABLE_ID) + JOB_ID = "JOB_ID" + JOB_TYPE = "unknown" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( + self.ENDPOINT, self.PROJECT, self.JOB_ID + ) + self.USER_EMAIL = "phred@example.com" + + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + + def _make_resource(self, started=False, ended=False): + self._setUpConstants() + return _make_job_resource( + creation_time_ms=int(self.WHEN_TS * 1000), + started_time_ms=int(self.WHEN_TS * 1000), + ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, + started=started, + ended=ended, + etag=self.ETAG, + endpoint=self.ENDPOINT, + job_type=self.JOB_TYPE, + job_id=self.JOB_ID, + project_id=self.PROJECT, + user_email=self.USER_EMAIL, + ) + + def _verifyInitialReadonlyProperties(self, job): + # root elements of resource + self.assertIsNone(job.etag) + self.assertIsNone(job.self_link) + self.assertIsNone(job.user_email) + + # derived from resource['statistics'] + self.assertIsNone(job.created) + self.assertIsNone(job.started) + self.assertIsNone(job.ended) + + # derived from resource['status'] + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + def _verifyReadonlyResourceProperties(self, job, resource): + from datetime import timedelta + + statistics = resource.get("statistics", {}) + + if "creationTime" in statistics: + self.assertEqual(job.created, self.WHEN) + else: + self.assertIsNone(job.created) + + if "startTime" in statistics: + self.assertEqual(job.started, self.WHEN) + else: + self.assertIsNone(job.started) + + if "endTime" in statistics: + self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) + else: + self.assertIsNone(job.ended) + + if "etag" in resource: + self.assertEqual(job.etag, self.ETAG) + else: + self.assertIsNone(job.etag) + + if "selfLink" in resource: + self.assertEqual(job.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(job.self_link) + + if "user_email" in resource: + self.assertEqual(job.user_email, self.USER_EMAIL) + else: + self.assertIsNone(job.user_email) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py new file mode 100644 index 000000000..90d4388b8 --- /dev/null +++ b/tests/unit/job/test_base.py @@ -0,0 +1,1105 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import unittest + +from google.api_core import exceptions +import google.api_core.retry +import mock +from six.moves import http_client + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_retriable_exception +from .helpers import _make_job_resource + + +class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._error_result_to_exception(*args, **kwargs) + + def test_simple(self): + error_result = {"reason": "invalid", "message": "bad request"} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertTrue(exception.message.startswith("bad request")) + self.assertIn(error_result, exception.errors) + + def test_missing_reason(self): + error_result = {} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + + +class Test_JobReference(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobReference + + def _make_one(self, job_id, project, location): + return self._get_target_class()(job_id, project, location) + + def test_ctor(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + def test__to_api_repr(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual( + job_ref._to_api_repr(), + { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + ) + + def test_from_api_repr(self): + api_repr = { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + } + + job_ref = self._get_target_class()._from_api_repr(api_repr) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + +class Test_AsyncJob(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._AsyncJob + + def _make_one(self, job_id, client): + return self._get_target_class()(job_id, client) + + def _make_derived_class(self): + class Derived(self._get_target_class()): + _JOB_TYPE = "derived" + + return Derived + + def _make_derived(self, job_id, client): + return self._make_derived_class()(job_id, client) + + @staticmethod + def _job_reference(job_id, project, location): + from google.cloud.bigquery import job + + return job._JobReference(job_id, project, location) + + def test_ctor_w_bare_job_id(self): + import threading + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertIsNone(job.location) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, + ) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test_ctor_w_job_ref(self): + import threading + + other_project = "other-project-234" + client = _make_client(project=other_project) + job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) + job = self._make_one(job_ref, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertEqual(job.location, self.LOCATION) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + { + "jobReference": { + "projectId": self.PROJECT, + "location": self.LOCATION, + "jobId": self.JOB_ID, + } + }, + ) + self.assertFalse(job._result_set) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test__require_client_w_none(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(None), client) + + def test__require_client_w_other(self): + client = _make_client(project=self.PROJECT) + other = object() + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(other), other) + + def test_job_type(self): + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + + self.assertEqual(derived.job_type, "derived") + + def test_parent_job_id(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.parent_job_id) + job._properties["statistics"] = {"parentJobId": "parent-job-123"} + self.assertEqual(job.parent_job_id, "parent-job-123") + + def test_script_statistics(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.script_statistics) + job._properties["statistics"] = { + "scriptStatistics": { + "evaluationKind": "EXPRESSION", + "stackFrames": [ + { + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + } + ], + } + } + script_stats = job.script_statistics + self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") + stack_frames = script_stats.stack_frames + self.assertEqual(len(stack_frames), 1) + stack_frame = stack_frames[0] + self.assertIsNone(stack_frame.procedure_id) + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + + def test_num_child_jobs(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.num_child_jobs, 0) + job._properties["statistics"] = {"numChildJobs": "17"} + self.assertEqual(job.num_child_jobs, 17) + + def test_labels_miss(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertEqual(job.labels, {}) + + def test_labels_update_in_place(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + labels = job.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["labels"] = labels + self.assertEqual(job.labels, labels) + + def test_etag(self): + etag = "ETAG-123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.etag) + job._properties["etag"] = etag + self.assertEqual(job.etag, etag) + + def test_self_link(self): + self_link = "https://api.example.com/123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.self_link) + job._properties["selfLink"] = self_link + self.assertEqual(job.self_link, self_link) + + def test_user_email(self): + user_email = "user@example.com" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.user_email) + job._properties["user_email"] = user_email + self.assertEqual(job.user_email, user_email) + + @staticmethod + def _datetime_and_millis(): + import datetime + import pytz + from google.cloud._helpers import _millis + + now = datetime.datetime.utcnow().replace( + microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + ) + return now, _millis(now) + + def test_created(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.created) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.created) + stats["creationTime"] = millis + self.assertEqual(job.created, now) + + def test_started(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.started) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.started) + stats["startTime"] = millis + self.assertEqual(job.started, now) + + def test_ended(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.ended) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.ended) + stats["endTime"] = millis + self.assertEqual(job.ended, now) + + def test__job_statistics(self): + statistics = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + self.assertEqual(derived._job_statistics(), {}) + stats = derived._properties["statistics"] = {} + self.assertEqual(derived._job_statistics(), {}) + stats["derived"] = statistics + self.assertEqual(derived._job_statistics(), statistics) + + def test_error_result(self): + error_result = { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.error_result) + status = job._properties["status"] = {} + self.assertIsNone(job.error_result) + status["errorResult"] = error_result + self.assertEqual(job.error_result, error_result) + + def test_errors(self): + errors = [ + { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.errors) + status = job._properties["status"] = {} + self.assertIsNone(job.errors) + status["errors"] = errors + self.assertEqual(job.errors, errors) + + def test_state(self): + state = "STATE" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.state) + status = job._properties["status"] = {} + self.assertIsNone(job.state) + status["state"] = state + self.assertEqual(job.state, state) + + def _set_properties_job(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._set_future_result = mock.Mock() + job._properties = { + "jobReference": job._properties["jobReference"], + "foo": "bar", + } + return job + + def test__set_properties_no_stats(self): + config = {"test": True} + resource = {"configuration": config} + job = self._set_properties_job() + + job._set_properties(resource) + + self.assertEqual(job._properties, resource) + + def test__set_properties_w_creation_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"creationTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["creationTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_start_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"startTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["startTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_end_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"endTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["endTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__check_resource_config_missing_job_ref(self): + resource = {} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_job_id(self): + resource = {"jobReference": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_configuration(self): + resource = {"jobReference": {"jobId": self.JOB_ID}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_config_type(self): + resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_ok(self): + derived_config = {"foo": "bar"} + resource = { + "jobReference": {"jobId": self.JOB_ID}, + "configuration": {"derived": derived_config}, + } + klass = self._make_derived_class() + + # Should not throw. + klass._check_resource_config(resource) + + def test__build_resource(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test_to_api_repr(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test__begin_already(self): + job = self._set_properties_job() + job._properties["status"] = {"state": "WHATEVER"} + + with self.assertRaises(ValueError): + job._begin() + + def test__begin_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test__begin_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin(client=client, retry=retry, timeout=7.5) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_exists_defaults_miss(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_RETRY + + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.side_effect = NotFound("testing") + self.assertFalse(job.exists()) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id", "location": self.LOCATION}, + timeout=None, + ) + + def test_exists_explicit_hit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + self.assertTrue(job.exists(client=client, retry=retry)) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id"}, + timeout=None, + ) + + def test_exists_w_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + job = self._set_properties_job() + call_api = job._client._call_api = mock.Mock() + job.exists(timeout=7.5) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={"path": PATH}, + job_ref=job, + method="GET", + path=PATH, + query_params={"fields": "id"}, + timeout=7.5, + ) + + def test_reload_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + job.reload() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_reload_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + job.reload(client=client, retry=retry, timeout=4.2) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=4.2, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_defaults(self): + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + connection = job._client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel()) + + final_attributes.assert_called() + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_explicit(self): + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + client = _make_client(project=other_project) + connection = client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel(client=client, timeout=7.5)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_w_custom_retry(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + + api_request_patcher = mock.patch.object( + job._client._connection, "api_request", side_effect=[ValueError, response] + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + self.assertTrue(result) + self.assertEqual(job._properties, resource) + self.assertEqual( + fake_api_request.call_args_list, + [ + mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), + mock.call( + method="POST", path=api_path, query_params={}, timeout=7.5 + ), # was retried once + ], + ) + + def test__set_future_result_wo_done(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_result_set(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + job._result_set = True + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_w_error(self): + from google.cloud.exceptions import NotFound + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "notFound", "message": "testing"}, + } + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_called_once() + args, kw = set_exception.call_args + (exception,) = args + self.assertIsInstance(exception, NotFound) + self.assertEqual(exception.message, "testing") + self.assertEqual(kw, {}) + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_wo_error(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_called_once_with(job) + + def test_done_defaults_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + + self.assertFalse(job.done()) + + reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) + + def test_done_explicit_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry, timeout=7.5)) + + reload_.assert_called_once_with(retry=retry, timeout=7.5) + + def test_done_already(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + + self.assertTrue(job.done()) + + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job.result(), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + self.assertIs(job.result(retry=custom_retry), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) + timeout = 1 + + self.assertIs(job.result(timeout=timeout), job) + + conn.api_request.assert_not_called() + + def test_cancelled_wo_error_result(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_not_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "other"}} + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_w_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "stopped"}} + + self.assertTrue(job.cancelled()) + + +class Test_JobConfig(unittest.TestCase): + JOB_TYPE = "testing" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobConfig + + def _make_one(self, job_type=JOB_TYPE): + return self._get_target_class()(job_type) + + def test_ctor(self): + job_config = self._make_one() + self.assertEqual(job_config._job_type, self.JOB_TYPE) + self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + + def test_fill_from_default(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + default_job_config = QueryJobConfig() + default_job_config.use_query_cache = True + default_job_config.maximum_bytes_billed = 2000 + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.dry_run) + self.assertTrue(final_job_config.use_query_cache) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + + def test_fill_from_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + basic_job_config = QueryJobConfig() + conflicting_job_config = self._make_one("conflicting_job_type") + self.assertNotEqual( + basic_job_config._job_type, conflicting_job_config._job_type + ) + + with self.assertRaises(TypeError): + basic_job_config._fill_from_default(conflicting_job_config) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_wo_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=None + ) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_w_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + default = "default" + self.assertIs( + job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value + ) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=default + ) + + @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") + def test__set_sub_prop(self, _set_sub_prop): + job_config = self._make_one() + key = "key" + value = "value" + job_config._set_sub_prop(key, value) + _set_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], value + ) + + def test_to_api_repr(self): + job_config = self._make_one() + expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} + found = job_config.to_api_repr() + self.assertEqual(found, expected) + self.assertIsNot(found, expected) # copied + + # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes + # the ctor can be called w/o arguments + + def test_labels_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.labels, {}) + + def test_labels_update_in_place(self): + job_config = self._make_one() + labels = job_config.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job_config.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config._properties["labels"] = labels + self.assertEqual(job_config.labels, labels) + + def test_labels_setter_invalid(self): + labels = object() + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.labels = labels + + def test_labels_setter(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config.labels = labels + self.assertEqual(job_config._properties["labels"], labels) diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py new file mode 100644 index 000000000..fb0c87391 --- /dev/null +++ b/tests/unit/job/test_copy.py @@ -0,0 +1,477 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestCopyJobConfig(_Base): + JOB_TYPE = "copy" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJobConfig + + return CopyJobConfig + + def test_ctor_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + create_disposition = CreateDisposition.CREATE_NEVER + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()( + create_disposition=create_disposition, write_disposition=write_disposition + ) + + self.assertEqual(config.create_disposition, create_disposition) + self.assertEqual(config.write_disposition, write_disposition) + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "copy": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"copy": {"destinationEncryptionConfiguration": None}} + ) + + +class TestCopyJob(_Base): + JOB_TYPE = "copy" + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJob + + return CopyJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestCopyJob, self)._make_resource(started, ended) + config = resource["configuration"]["copy"] + config["sourceTables"] = [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("copy") + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + sources = config.get("sourceTables") + if sources is None: + sources = [config["sourceTable"]] + self.assertEqual(len(sources), len(job.sources)) + for table_ref, table in zip(sources, job.sources): + self.assertEqual(table.project, table_ref["projectId"]) + self.assertEqual(table.dataset_id, table_ref["datasetId"]) + self.assertEqual(table.table_id, table_ref["tableId"]) + + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + self.assertEqual(job.destination, destination) + self.assertEqual(job.sources, [source]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_sourcetable(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_wo_sources(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + with self.assertRaises(KeyError): + _ = job.sources + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + copy_config = RESOURCE["configuration"]["copy"] + copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CopyJobConfig + + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + COPY_CONFIGURATION = { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_NEVER, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job = self._make_one(self.JOB_ID, [source], destination, client1, config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"copy": COPY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py new file mode 100644 index 000000000..4c9411d0d --- /dev/null +++ b/tests/unit/job/test_extract.py @@ -0,0 +1,437 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestExtractJobConfig(_Base): + JOB_TYPE = "extract" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJobConfig + + return ExtractJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()(field_delimiter="\t", print_header=True) + + self.assertEqual(config.field_delimiter, "\t") + self.assertTrue(config.print_header) + + def test_to_api_repr(self): + from google.cloud.bigquery import job + + config = self._make_one() + config.compression = job.Compression.SNAPPY + config.destination_format = job.DestinationFormat.AVRO + config.field_delimiter = "ignored for avro" + config.print_header = False + config._properties["extract"]["someNewField"] = "some-value" + config.use_avro_logical_types = True + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "extract": { + "compression": "SNAPPY", + "destinationFormat": "AVRO", + "fieldDelimiter": "ignored for avro", + "printHeader": False, + "someNewField": "some-value", + "useAvroLogicalTypes": True, + } + }, + ) + + def test_from_api_repr(self): + cls = self._get_target_class() + config = cls.from_api_repr( + { + "extract": { + "compression": "NONE", + "destinationFormat": "CSV", + "fieldDelimiter": "\t", + "printHeader": True, + "someNewField": "some-value", + "useAvroLogicalTypes": False, + } + } + ) + self.assertEqual(config.compression, "NONE") + self.assertEqual(config.destination_format, "CSV") + self.assertEqual(config.field_delimiter, "\t") + self.assertEqual(config.print_header, True) + self.assertEqual(config._properties["extract"]["someNewField"], "some-value") + self.assertEqual(config.use_avro_logical_types, False) + + +class TestExtractJob(_Base): + JOB_TYPE = "extract" + SOURCE_TABLE = "source_table" + DESTINATION_URI = "gs://bucket_name/object_name" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJob + + return ExtractJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestExtractJob, self)._make_resource(started, ended) + config = resource["configuration"]["extract"] + config["sourceTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + config["destinationUris"] = [self.DESTINATION_URI] + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("extract") + + self.assertEqual(job.destination_uris, config["destinationUris"]) + + if "sourceTable" in config: + table_ref = config["sourceTable"] + self.assertEqual(job.source.project, table_ref["projectId"]) + self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.source.table_id, table_ref["tableId"]) + else: + model_ref = config["sourceModel"] + self.assertEqual(job.source.project, model_ref["projectId"]) + self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) + self.assertEqual(job.source.model_id, model_ref["modelId"]) + + if "compression" in config: + self.assertEqual(job.compression, config["compression"]) + else: + self.assertIsNone(job.compression) + + if "destinationFormat" in config: + self.assertEqual(job.destination_format, config["destinationFormat"]) + else: + self.assertIsNone(job.destination_format) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + + if "printHeader" in config: + self.assertEqual(job.print_header, config["printHeader"]) + else: + self.assertIsNone(job.print_header) + + def test_ctor(self): + from google.cloud.bigquery.table import Table + + client = _make_client(project=self.PROJECT) + source = Table(self.TABLE_REF) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + self.assertEqual(job.source.project, self.PROJECT) + self.assertEqual(job.source.dataset_id, self.DS_ID) + self.assertEqual(job.source.table_id, self.TABLE_ID) + self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['extract'] + self.assertIsNone(job.compression) + self.assertIsNone(job.destination_format) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.print_header) + + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _make_client(project=self.PROJECT) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics["extract"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats["destinationUriFileCounts"] = [str(file_counts)] + self.assertEqual(job.destination_uri_file_counts, [file_counts]) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_for_model(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": "model_id", + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import Compression + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + extract_config = RESOURCE["configuration"]["extract"] + extract_config["compression"] = Compression.GZIP + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import Compression + from google.cloud.bigquery.job import DestinationFormat + from google.cloud.bigquery.job import ExtractJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + EXTRACT_CONFIGURATION = { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + "compression": Compression.GZIP, + "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, + "fieldDelimiter": "|", + "printHeader": False, + } + RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + config = ExtractJobConfig() + config.compression = Compression.GZIP + config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON + config.field_delimiter = "|" + config.print_header = False + job = self._make_one( + self.JOB_ID, source, [self.DESTINATION_URI], client1, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"extract": EXTRACT_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py new file mode 100644 index 000000000..70e7860a7 --- /dev/null +++ b/tests/unit/job/test_load.py @@ -0,0 +1,838 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestLoadJob(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJob + + return LoadJob + + def _setUpConstants(self): + super(TestLoadJob, self)._setUpConstants() + self.INPUT_FILES = 2 + self.INPUT_BYTES = 12345 + self.OUTPUT_BYTES = 23456 + self.OUTPUT_ROWS = 345 + + def _make_resource(self, started=False, ended=False): + resource = super(TestLoadJob, self)._make_resource(started, ended) + config = resource["configuration"]["load"] + config["sourceUris"] = [self.SOURCE1] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + + if ended: + resource["status"] = {"state": "DONE"} + resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES + resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES + resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES + resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS + + return resource + + def _verifyBooleanConfigProperties(self, job, config): + if "allowJaggedRows" in config: + self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) + else: + self.assertIsNone(job.allow_jagged_rows) + if "allowQuotedNewlines" in config: + self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) + else: + self.assertIsNone(job.allow_quoted_newlines) + if "autodetect" in config: + self.assertEqual(job.autodetect, config["autodetect"]) + else: + self.assertIsNone(job.autodetect) + if "ignoreUnknownValues" in config: + self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) + else: + self.assertIsNone(job.ignore_unknown_values) + if "useAvroLogicalTypes" in config: + self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) + else: + self.assertIsNone(job.use_avro_logical_types) + + def _verifyEnumConfigProperties(self, job, config): + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "encoding" in config: + self.assertEqual(job.encoding, config["encoding"]) + else: + self.assertIsNone(job.encoding) + if "sourceFormat" in config: + self.assertEqual(job.source_format, config["sourceFormat"]) + else: + self.assertIsNone(job.source_format) + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "schemaUpdateOptions" in config: + self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) + else: + self.assertIsNone(job.schema_update_options) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("load") + + self._verifyBooleanConfigProperties(job, config) + self._verifyEnumConfigProperties(job, config) + + self.assertEqual(job.source_uris, config["sourceUris"]) + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + if "maxBadRecords" in config: + self.assertEqual(job.max_bad_records, config["maxBadRecords"]) + else: + self.assertIsNone(job.max_bad_records) + if "nullMarker" in config: + self.assertEqual(job.null_marker, config["nullMarker"]) + else: + self.assertIsNone(job.null_marker) + if "quote" in config: + self.assertEqual(job.quote_character, config["quote"]) + else: + self.assertIsNone(job.quote_character) + if "skipLeadingRows" in config: + self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) + else: + self.assertIsNone(job.skip_leading_rows) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(job.destination, self.TABLE_REF) + self.assertEqual(list(job.source_uris), [self.SOURCE1]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # derived from resource['statistics']['load'] + self.assertIsNone(job.input_file_bytes) + self.assertIsNone(job.input_files) + self.assertIsNone(job.output_bytes) + self.assertIsNone(job.output_rows) + + # set/read from resource['configuration']['load'] + self.assertIsNone(job.schema) + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.destination_table_description) + self.assertIsNone(job.destination_table_friendly_name) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.use_avro_logical_types) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_config(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.job import LoadJobConfig + + client = _make_client(project=self.PROJECT) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + self.assertEqual(job.schema, [full_name, age]) + config.destination_table_description = "Description" + expected = {"description": "Description"} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + friendly_name = "Friendly Name" + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_ctor_w_job_reference(self): + from google.cloud.bigquery import job + + client = _make_client(project=self.PROJECT) + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(load_job.project, "alternative-project") + self.assertEqual(load_job.location, "US") + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + self.assertTrue(job.done()) + + def test_result(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIs(result, job) + + def test_result_invokes_begin(self): + begun_resource = self._make_resource() + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, done_resource) + client = _make_client(self.PROJECT) + client._connection = connection + + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 2) + begin_request, reload_request = connection.api_request.call_args_list + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_schema_setter_non_list(self): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + with self.assertRaises(TypeError): + config.schema = object() + + def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + with self.assertRaises(ValueError): + config.schema = [full_name, object()] + + def test_schema_setter(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) + + def test_props_set_by_server(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) + STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) + ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) + FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + EMAIL = "phred@example.com" + ERROR_RESULT = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["etag"] = "ETAG" + job._properties["id"] = FULL_JOB_ID + job._properties["selfLink"] = URL + job._properties["user_email"] = EMAIL + + statistics = job._properties["statistics"] = {} + statistics["creationTime"] = _millis(CREATED) + statistics["startTime"] = _millis(STARTED) + statistics["endTime"] = _millis(ENDED) + + self.assertEqual(job.etag, "ETAG") + self.assertEqual(job.self_link, URL) + self.assertEqual(job.user_email, EMAIL) + + self.assertEqual(job.created, CREATED) + self.assertEqual(job.started, STARTED) + self.assertEqual(job.ended, ENDED) + + # running jobs have no load stats not yet set. + self.assertIsNone(job.output_bytes) + + load_stats = statistics["load"] = {} + load_stats["inputFileBytes"] = 12345 + load_stats["inputFiles"] = 1 + load_stats["outputBytes"] = 23456 + load_stats["outputRows"] = 345 + + self.assertEqual(job.input_file_bytes, 12345) + self.assertEqual(job.input_files, 1) + self.assertEqual(job.output_bytes, 23456) + self.assertEqual(job.output_rows, 345) + + status = job._properties["status"] = {} + + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + status["errorResult"] = ERROR_RESULT + status["errors"] = [ERROR_RESULT] + status["state"] = "STATE" + + self.assertEqual(job.error_result, ERROR_RESULT) + self.assertEqual(job.errors, [ERROR_RESULT]) + self.assertEqual(job.state, "STATE") + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + load_config = RESOURCE["configuration"]["load"] + load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_already_running(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["status"] = {"state": "RUNNING"} + + with self.assertRaises(ValueError): + job._begin() + + def test_begin_w_bound_client(self): + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_autodetect(self): + from google.cloud.bigquery.job import LoadJobConfig + + path = "/projects/{}/jobs".format(self.PROJECT) + resource = self._make_resource() + resource["configuration"]["load"]["autodetect"] = True + # Ensure None for missing server-set props + del resource["statistics"]["creationTime"] + del resource["etag"] + del resource["selfLink"] + del resource["user_email"] + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + config = LoadJobConfig() + config.autodetect = True + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + sent = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "autodetect": True, + } + }, + } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=sent, timeout=None + ) + self._verifyResourceProperties(job, resource) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + from google.cloud.bigquery.schema import SchemaField + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + LOAD_CONFIGURATION = { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "allowJaggedRows": True, + "allowQuotedNewlines": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "encoding": "ISO-8559-1", + "fieldDelimiter": "|", + "ignoreUnknownValues": True, + "maxBadRecords": 100, + "nullMarker": r"\N", + "quote": "'", + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "useAvroLogicalTypes": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config + ) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "ISO-8559-1" + config.field_delimiter = "|" + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r"\N" + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = "CSV" + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + self.assertEqual(len(conn2.api_request.call_args_list), 1) + req = conn2.api_request.call_args_list[0] + self.assertEqual(req[1]["method"], "POST") + self.assertEqual(req[1]["path"], PATH) + SENT = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"load": LOAD_CONFIGURATION}, + } + self.maxDiff = None + self.assertEqual(req[1]["data"], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource() + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job._begin() + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs"}, client, load_job + ) + + conn.api_request.assert_called_once() + _, request = conn.api_request.call_args + self.assertEqual(request["method"], "POST") + self.assertEqual(request["path"], "/projects/alternative-project/jobs") + self.assertEqual( + request["data"]["jobReference"]["projectId"], "alternative-project" + ) + self.assertEqual(request["data"]["jobReference"]["location"], "US") + self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client2, + job, + ) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_miss_w_job_reference(self): + from google.cloud.bigquery import job + + job_ref = job._JobReference("my-job-id", "other-project", "US") + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(load_job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs/my-job-id", + query_params={"fields": "id", "location": "US"}, + timeout=None, + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.reload() + + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + client, + load_job, + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) + + def test_cancel_w_bound_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn = _make_connection(RESPONSE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESPONSE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection({"job": resource}) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.cancel() + + final_attributes.assert_called_with( + { + "path": "/projects/alternative-project/jobs/{}/cancel".format( + self.JOB_ID + ) + }, + client, + load_job, + ) + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py new file mode 100644 index 000000000..c18f51bff --- /dev/null +++ b/tests/unit/job/test_load_config.py @@ -0,0 +1,710 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import pytest + +from .helpers import _Base + + +class TestLoadJobConfig(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJobConfig + + return LoadJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()( + allow_jagged_rows=True, allow_quoted_newlines=True + ) + + self.assertTrue(config.allow_jagged_rows) + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_jagged_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_jagged_rows) + + def test_allow_jagged_rows_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowJaggedRows"] = True + self.assertTrue(config.allow_jagged_rows) + + def test_allow_jagged_rows_setter(self): + config = self._get_target_class()() + config.allow_jagged_rows = True + self.assertTrue(config._properties["load"]["allowJaggedRows"]) + + def test_allow_quoted_newlines_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowQuotedNewlines"] = True + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_setter(self): + config = self._get_target_class()() + config.allow_quoted_newlines = True + self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) + + def test_autodetect_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.autodetect) + + def test_autodetect_hit(self): + config = self._get_target_class()() + config._properties["load"]["autodetect"] = True + self.assertTrue(config.autodetect) + + def test_autodetect_setter(self): + config = self._get_target_class()() + config.autodetect = True + self.assertTrue(config._properties["load"]["autodetect"]) + + def test_clustering_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.clustering_fields) + + def test_clustering_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + self.assertEqual(config.clustering_fields, fields) + + def test_clustering_fields_setter(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) + + def test_clustering_fields_setter_w_none(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + self.assertNotIn("clustering", config._properties["load"]) + + def test_create_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.create_disposition) + + def test_create_disposition_hit(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config._properties["load"]["createDisposition"] = disposition + self.assertEqual(config.create_disposition, disposition) + + def test_create_disposition_setter(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config.create_disposition = disposition + self.assertEqual(config._properties["load"]["createDisposition"], disposition) + + def test_destination_encryption_configuration_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_encryption_configuration) + + def test_destination_encryption_configuration_hit(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + self.assertEqual( + config.destination_encryption_configuration, encryption_configuration + ) + + def test_destination_encryption_configuration_setter(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config.destination_encryption_configuration = encryption_configuration + expected = {"kmsKeyName": kms_key_name} + self.assertEqual( + config._properties["load"]["destinationEncryptionConfiguration"], expected + ) + + def test_destination_encryption_configuration_setter_w_none(self): + kms_key_name = "kms-key-name" + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + config.destination_encryption_configuration = None + self.assertIsNone(config.destination_encryption_configuration) + self.assertNotIn( + "destinationEncryptionConfiguration", config._properties["load"] + ) + + def test_destination_table_description_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_description) + + def test_destination_table_description_hit(self): + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + self.assertEqual(config.destination_table_description, description) + + def test_destination_table_description_setter(self): + description = "Description" + config = self._get_target_class()() + config.destination_table_description = description + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_setter_w_fn_already(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + config.destination_table_description = description + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_w_none(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_description = None + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_friendly_name) + + def test_destination_table_friendly_name_hit(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_destination_table_friendly_name_setter(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_setter_w_descr_already(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_w_none(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_friendly_name = None + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_encoding_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.encoding) + + def test_encoding_hit(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config._properties["load"]["encoding"] = encoding + self.assertEqual(config.encoding, encoding) + + def test_encoding_setter(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config.encoding = encoding + self.assertEqual(config._properties["load"]["encoding"], encoding) + + def test_field_delimiter_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.field_delimiter) + + def test_field_delimiter_hit(self): + field_delimiter = "|" + config = self._get_target_class()() + config._properties["load"]["fieldDelimiter"] = field_delimiter + self.assertEqual(config.field_delimiter, field_delimiter) + + def test_field_delimiter_setter(self): + field_delimiter = "|" + config = self._get_target_class()() + config.field_delimiter = field_delimiter + self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + + def test_ignore_unknown_values_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.ignore_unknown_values) + + def test_ignore_unknown_values_hit(self): + config = self._get_target_class()() + config._properties["load"]["ignoreUnknownValues"] = True + self.assertTrue(config.ignore_unknown_values) + + def test_ignore_unknown_values_setter(self): + config = self._get_target_class()() + config.ignore_unknown_values = True + self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) + + def test_max_bad_records_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.max_bad_records) + + def test_max_bad_records_hit(self): + max_bad_records = 13 + config = self._get_target_class()() + config._properties["load"]["maxBadRecords"] = max_bad_records + self.assertEqual(config.max_bad_records, max_bad_records) + + def test_max_bad_records_setter(self): + max_bad_records = 13 + config = self._get_target_class()() + config.max_bad_records = max_bad_records + self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) + + def test_null_marker_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_marker) + + def test_null_marker_hit(self): + null_marker = "XXX" + config = self._get_target_class()() + config._properties["load"]["nullMarker"] = null_marker + self.assertEqual(config.null_marker, null_marker) + + def test_null_marker_setter(self): + null_marker = "XXX" + config = self._get_target_class()() + config.null_marker = null_marker + self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + + def test_quote_character_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.quote_character) + + def test_quote_character_hit(self): + quote_character = "'" + config = self._get_target_class()() + config._properties["load"]["quote"] = quote_character + self.assertEqual(config.quote_character, quote_character) + + def test_quote_character_setter(self): + quote_character = "'" + config = self._get_target_class()() + config.quote_character = quote_character + self.assertEqual(config._properties["load"]["quote"], quote_character) + + def test_schema_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema) + + def test_schema_hit(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + all_props_repr = { + "mode": "REQUIRED", + "name": "foo", + "type": "INTEGER", + "description": "Foo", + } + minimal_repr = {"name": "bar", "type": "STRING"} + config._properties["load"]["schema"] = { + "fields": [all_props_repr, minimal_repr] + } + all_props, minimal = config.schema + self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) + self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) + + def test_schema_setter_fields(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_valid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + config.schema = schema + + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_invalid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + ] + + with self.assertRaises(Exception): + config.schema = schema + + def test_schema_setter_unsetting_schema(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + config._properties["load"]["schema"] = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + config.schema = None + self.assertNotIn("schema", config._properties["load"]) + config.schema = None # no error, idempotent operation + + def test_schema_update_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema_update_options) + + def test_schema_update_options_hit(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config._properties["load"]["schemaUpdateOptions"] = options + self.assertEqual(config.schema_update_options, options) + + def test_schema_update_options_setter(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config.schema_update_options = options + self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) + + def test_skip_leading_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.skip_leading_rows) + + def test_skip_leading_rows_hit_w_str(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_hit_w_integer(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = skip_leading_rows + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_setter(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config.skip_leading_rows = skip_leading_rows + self.assertEqual( + config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) + ) + + def test_source_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_format) + + def test_source_format_hit(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config._properties["load"]["sourceFormat"] = source_format + self.assertEqual(config.source_format, source_format) + + def test_source_format_setter(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config.source_format = source_format + self.assertEqual(config._properties["load"]["sourceFormat"], source_format) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["load"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.time_partitioning) + + def test_time_partitioning_hit(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + self.assertEqual(config.time_partitioning, expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + + config = self._get_target_class()() + config.time_partitioning = time_partitioning + expected = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + self.assertEqual(config._properties["load"]["timePartitioning"], expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter_w_none(self): + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + self.assertNotIn("timePartitioning", config._properties["load"]) + + def test_use_avro_logical_types(self): + config = self._get_target_class()() + self.assertIsNone(config.use_avro_logical_types) + + def test_use_avro_logical_types_setter(self): + config = self._get_target_class()() + config.use_avro_logical_types = True + self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) + + def test_write_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.write_disposition) + + def test_write_disposition_hit(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config._properties["load"]["writeDisposition"] = write_disposition + self.assertEqual(config.write_disposition, write_disposition) + + def test_write_disposition_setter(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config.write_disposition = write_disposition + self.assertEqual( + config._properties["load"]["writeDisposition"], write_disposition + ) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py new file mode 100644 index 000000000..c0b90d8ea --- /dev/null +++ b/tests/unit/job/test_query.py @@ -0,0 +1,1811 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent +import copy +import textwrap + +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import mock +import requests +from six.moves import http_client + +import google.cloud.bigquery.query +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestQueryJob(_Base): + JOB_TYPE = "query" + QUERY = "select count(*) from persons" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJob + + return QueryJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestQueryJob, self)._make_resource(started, ended) + config = resource["configuration"]["query"] + config["query"] = self.QUERY + return resource + + def _verifyBooleanResourceProperties(self, job, config): + + if "allowLargeResults" in config: + self.assertEqual(job.allow_large_results, config["allowLargeResults"]) + else: + self.assertIsNone(job.allow_large_results) + if "flattenResults" in config: + self.assertEqual(job.flatten_results, config["flattenResults"]) + else: + self.assertIsNone(job.flatten_results) + if "useQueryCache" in config: + self.assertEqual(job.use_query_cache, config["useQueryCache"]) + else: + self.assertIsNone(job.use_query_cache) + if "useLegacySql" in config: + self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) + else: + self.assertIsNone(job.use_legacy_sql) + + def _verifyIntegerResourceProperties(self, job, config): + if "maximumBillingTier" in config: + self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) + else: + self.assertIsNone(job.maximum_billing_tier) + if "maximumBytesBilled" in config: + self.assertEqual( + str(job.maximum_bytes_billed), config["maximumBytesBilled"] + ) + self.assertIsInstance(job.maximum_bytes_billed, int) + else: + self.assertIsNone(job.maximum_bytes_billed) + + def _verify_udf_resources(self, job, config): + udf_resources = config.get("userDefinedFunctionResources", ()) + self.assertEqual(len(job.udf_resources), len(udf_resources)) + for found, expected in zip(job.udf_resources, udf_resources): + if "resourceUri" in expected: + self.assertEqual(found.udf_type, "resourceUri") + self.assertEqual(found.value, expected["resourceUri"]) + else: + self.assertEqual(found.udf_type, "inlineCode") + self.assertEqual(found.value, expected["inlineCode"]) + + def _verifyQueryParameters(self, job, config): + query_parameters = config.get("queryParameters", ()) + self.assertEqual(len(job.query_parameters), len(query_parameters)) + for found, expected in zip(job.query_parameters, query_parameters): + self.assertEqual(found.to_api_repr(), expected) + + def _verify_table_definitions(self, job, config): + table_defs = config.get("tableDefinitions") + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + + def _verify_configuration_properties(self, job, configuration): + if "dryRun" in configuration: + self.assertEqual(job.dry_run, configuration["dryRun"]) + else: + self.assertIsNone(job.dry_run) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + configuration = resource.get("configuration", {}) + self._verify_configuration_properties(job, configuration) + + query_config = resource.get("configuration", {}).get("query") + self._verifyBooleanResourceProperties(job, query_config) + self._verifyIntegerResourceProperties(job, query_config) + self._verify_udf_resources(job, query_config) + self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) + + self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: + self.assertEqual(job.create_disposition, query_config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: + ds_ref = job.default_dataset + ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} + self.assertEqual(ds_ref, query_config["defaultDataset"]) + else: + self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: + table = job.destination + tb_ref = { + "projectId": table.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, + } + self.assertEqual(tb_ref, query_config["destinationTable"]) + else: + self.assertIsNone(job.destination) + if "priority" in query_config: + self.assertEqual(job.priority, query_config["priority"]) + else: + self.assertIsNone(job.priority) + if "writeDisposition" in query_config: + self.assertEqual(job.write_disposition, query_config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + query_config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: + self.assertEqual( + job.schema_update_options, query_config["schemaUpdateOptions"] + ) + else: + self.assertIsNone(job.schema_update_options) + + def test_ctor_defaults(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query, self.QUERY) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] + self.assertIsNone(job.allow_large_results) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.default_dataset) + self.assertIsNone(job.destination) + self.assertIsNone(job.flatten_results) + self.assertIsNone(job.priority) + self.assertIsNone(job.use_query_cache) + self.assertIsNone(job.dry_run) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.maximum_billing_tier) + self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_udf_resources(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.udf_resources, udf_resources) + + def test_ctor_w_query_parameters(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig(query_parameters=query_parameters) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.query_parameters, query_parameters) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + query_config = RESOURCE["configuration"]["query"] + query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE + query_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancelled(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "stopped"}, + } + + self.assertTrue(job.cancelled()) + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) + self.assertTrue(job.done()) + + def test_done_w_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=42) + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + call_args = fake_reload.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + def test_done_w_timeout_and_longer_internal_api_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._done_timeout = 8.8 + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=5.5) + + # The expected timeout used is simply the given timeout, as the latter + # is shorter than the job's internal done timeout. + expected_timeout = 5.5 + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + call_args = fake_reload.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + def test_query_plan(self): + from google.cloud._helpers import _RFC3339_MICROS + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [ + { + "name": "NAME", + "id": "1234", + "inputStages": ["88", "101"], + "startMs": "1522540800000", + "endMs": "1522540804000", + "parallelInputs": "1000", + "completedParallelInputs": "5", + "waitMsAvg": "33", + "waitMsMax": "400", + "waitRatioAvg": 2.71828, + "waitRatioMax": 3.14159, + "readMsAvg": "45", + "readMsMax": "90", + "readRatioAvg": 1.41421, + "readRatioMax": 1.73205, + "computeMsAvg": "55", + "computeMsMax": "99", + "computeRatioAvg": 0.69315, + "computeRatioMax": 1.09861, + "writeMsAvg": "203", + "writeMsMax": "340", + "writeRatioAvg": 3.32193, + "writeRatioMax": 2.30258, + "recordsRead": "100", + "recordsWritten": "1", + "status": "STATUS", + "shuffleOutputBytes": "1024", + "shuffleOutputBytesSpilled": "1", + "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.query_plan, []) + + query_stats["queryPlan"] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected["name"]) + self.assertEqual(found.entry_id, expected["id"]) + self.assertEqual(len(found.input_stages), len(expected["inputStages"])) + for f_id in found.input_stages: + self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) + self.assertEqual( + found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" + ) + self.assertEqual( + found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" + ) + self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) + self.assertEqual( + found.completed_parallel_inputs, + int(expected["completedParallelInputs"]), + ) + self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) + self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) + self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) + self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) + self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) + self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) + self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) + self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) + self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) + self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) + self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) + self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) + self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) + self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) + self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) + self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) + self.assertEqual(found.records_read, int(expected["recordsRead"])) + self.assertEqual(found.records_written, int(expected["recordsWritten"])) + self.assertEqual(found.status, expected["status"]) + self.assertEqual( + found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) + ) + self.assertEqual( + found.shuffle_output_bytes_spilled, + int(expected["shuffleOutputBytesSpilled"]), + ) + + self.assertEqual(len(found.steps), len(expected["steps"])) + for f_step, e_step in zip(found.steps, expected["steps"]): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step["kind"]) + self.assertEqual(f_step.substeps, e_step["substeps"]) + + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats["totalBytesProcessed"] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats["totalBytesBilled"] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.billing_tier) + + query_stats["billingTier"] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.cache_hit) + + query_stats["cacheHit"] = True + self.assertTrue(job.cache_hit) + + def test_ddl_operation_performed(self): + op = "SKIP" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_operation_performed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats["ddlOperationPerformed"] = op + self.assertEqual(job.ddl_operation_performed, op) + + def test_ddl_target_routine(self): + from google.cloud.bigquery.routine import RoutineReference + + ref_routine = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "routineId": "targetroutine", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_routine) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats["ddlTargetRoutine"] = ref_routine + self.assertIsInstance(job.ddl_target_routine, RoutineReference) + self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") + self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_routine.project, self.PROJECT) + + def test_ddl_target_table(self): + from google.cloud.bigquery.table import TableReference + + ref_table = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "tableId": "targettable", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_table) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats["ddlTargetTable"] = ref_table + self.assertIsInstance(job.ddl_target_table, TableReference) + self.assertEqual(job.ddl_target_table.table_id, "targettable") + self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_table.project, self.PROJECT) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats["numDmlAffectedRows"] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_slot_millis(self): + millis = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.slot_millis) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.slot_millis) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.slot_millis) + + query_stats["totalSlotMs"] = millis + self.assertEqual(job.slot_millis, millis) + + def test_statement_type(self): + statement_type = "SELECT" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.statement_type) + + query_stats["statementType"] = statement_type + self.assertEqual(job.statement_type, statement_type) + + def test_referenced_tables(self): + from google.cloud.bigquery.table import TableReference + + ref_tables_resource = [ + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "tableId": "other-table", + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats["referencedTables"] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, TableReference) + self.assertEqual(local1.table_id, "local1") + self.assertEqual(local1.dataset_id, "dataset") + self.assertEqual(local1.project, self.PROJECT) + + self.assertIsInstance(local2, TableReference) + self.assertEqual(local2.table_id, "local2") + self.assertEqual(local2.dataset_id, "dataset") + self.assertEqual(local2.project, self.PROJECT) + + self.assertIsInstance(remote, TableReference) + self.assertEqual(remote.table_id, "other-table") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") + + def test_timeline(self): + timeline_resource = [ + { + "elapsedMs": 1, + "activeUnits": 22, + "pendingUnits": 33, + "completedUnits": 44, + "totalSlotMs": 101, + } + ] + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.timeline, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.timeline, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.timeline, []) + + query_stats["timeline"] = timeline_resource + + self.assertEqual(len(job.timeline), len(timeline_resource)) + self.assertEqual(job.timeline[0].elapsed_ms, 1) + self.assertEqual(job.timeline[0].active_units, 22) + self.assertEqual(job.timeline[0].pending_units, 33) + self.assertEqual(job.timeline[0].completed_units, 44) + self.assertEqual(job.timeline[0].slot_millis, 101) + + def test_undeclared_query_parameters(self): + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter + + undeclared = [ + { + "name": "my_scalar", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "value"}, + }, + { + "name": "my_array", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": { + "arrayValues": [{"value": "1066"}, {"value": "1745"}] + }, + }, + { + "name": "my_struct", + "parameterType": { + "type": "STRUCT", + "structTypes": [{"name": "count", "type": {"type": "INT64"}}], + }, + "parameterValue": {"structValues": {"count": {"value": "123"}}}, + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.undeclared_query_parameters, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats["undeclaredQueryParameters"] = undeclared + + scalar, array, struct = job.undeclared_query_parameters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, "my_scalar") + self.assertEqual(scalar.type_, "STRING") + self.assertEqual(scalar.value, "value") + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, "my_array") + self.assertEqual(array.array_type, "INT64") + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, "my_struct") + self.assertEqual(struct.struct_types, {"count": "INT64"}) + self.assertEqual(struct.struct_values, {"count": 123}) + + def test_estimated_bytes_processed(self): + est_bytes = 123456 + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.estimated_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats["estimatedBytesProcessed"] = str(est_bytes) + self.assertEqual(job.estimated_bytes_processed, est_bytes) + + def test_result(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 2) + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + # Test that the total_rows property has changed during iteration, based + # on the response from tabledata.list. + self.assertEqual(result.total_rows, 1) + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + max_results = 3 + + result = job.result(max_results=max_results) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 3) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], max_results + ) + + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.001, + predicate=custom_predicate, + ) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + + def test_result_w_empty_schema(self): + from google.cloud.bigquery.table import _EmptyRowIterator + + # Destination table may have no schema for some DDL and DML queries. + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": []}, + } + connection = _make_connection(query_resource, query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIsInstance(result, _EmptyRowIterator) + self.assertEqual(list(result), []) + + def test_result_invokes_begins(self): + begun_resource = self._make_resource() + incomplete_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + query_resource = copy.deepcopy(incomplete_resource) + query_resource["jobComplete"] = True + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, + incomplete_resource, + query_resource, + done_resource, + query_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 4) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[2] + reload_request = connection.api_request.call_args_list[3] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_timeout(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result(timeout=1.0) + + self.assertEqual(len(connection.api_request.call_args_list), 3) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[1] + reload_request = connection.api_request.call_args_list[2] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual( + query_request[1]["path"], + "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + ) + self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_page_size(self): + # Arrange + query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "4", + } + job_resource = self._make_resource(started=True, ended=True) + q_config = job_resource["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + tabledata_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = _make_connection( + query_results_resource, tabledata_resource, tabledata_resource_page_2 + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + conn.api_request.assert_has_calls( + [ + mock.call( + method="GET", + path=tabledata_path, + query_params={"maxResults": 3}, + timeout=None, + ), + mock.call( + method="GET", + path=tabledata_path, + query_params={"pageToken": "some-page-token", "maxResults": 3}, + timeout=None, + ), + ] + ) + + def test_result_with_start_index(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + + result = job.result(start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 4) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["startIndex"], start_index + ) + + def test_result_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + error_result = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "invalid", + } + job._properties["status"] = { + "errorResult": error_result, + "errors": [error_result], + "state": "DONE", + } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) + job._set_future_result() + + with self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test_result_transport_timeout_error(self): + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=requests.exceptions.Timeout("Server response took too long."), + ) + + # Make sure that timeout errors get rebranded to concurrent futures timeout. + with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1) + + def test__begin_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test__begin_w_timeout(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(timeout=7.5) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False} + }, + }, + timeout=7.5, + ) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + DS_ID = "DATASET" + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertIsNone(job.default_dataset) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.job import QueryPriority + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + TABLE = "TABLE" + DS_ID = "DATASET" + RESOURCE = self._make_resource(ended=True) + QUERY_CONFIGURATION = { + "query": self.QUERY, + "allowLargeResults": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": TABLE, + }, + "flattenResults": True, + "priority": QueryPriority.INTERACTIVE, + "useQueryCache": True, + "useLegacySql": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "maximumBillingTier": 4, + "maximumBytesBilled": "123456", + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], + } + RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION + RESOURCE["configuration"]["dryRun"] = True + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(TABLE) + + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = QueryPriority.INTERACTIVE + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.maximum_bytes_billed = 123456 + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] + job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_udf(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + udf_resources = [ + UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE), + ] + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.udf_resources, udf_resources) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "userDefinedFunctionResources": [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_named_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "NAMED" + config["queryParameters"] = [ + { + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + } + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "NAMED", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_positional_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter.positional("INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "POSITIONAL" + config["queryParameters"] = [ + {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "POSITIONAL", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + + bt_config = ExternalConfig("BIGTABLE") + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = "cf" + col = BigtableColumn() + col.field_name = "fn" + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + "sourceFormat": "BIGTABLE", + "ignoreUnknownValues": True, + "bigtableOptions": { + "readRowkeyAsString": True, + "columnFamilies": [ + {"familyId": "cf", "columns": [{"fieldName": "fn"}]} + ], + }, + } + CSV_CONFIG_RESOURCE = { + "sourceFormat": "CSV", + "maxBadRecords": 8, + "csvOptions": {"allowJaggedRows": True}, + } + csv_config = ExternalConfig("CSV") + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = "bigtable-table" + csv_table = "csv-table" + RESOURCE["configuration"]["query"]["tableDefinitions"] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = {bt_table: bt_config, csv_table: csv_config} + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "tableDefinitions": { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, want_resource) + + def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["dryRun"] = True + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.dry_run = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False}, + "dryRun": True, + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + q_config = RESOURCE["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": DEST_TABLE, + } + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_timeout(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(timeout=4.2) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=4.2 + ) + + def test_iter(self): + import types + + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "0", + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + self.assertIsInstance(iter(job), types.GeneratorType) diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py new file mode 100644 index 000000000..db03d6a3b --- /dev/null +++ b/tests/unit/job/test_query_config.py @@ -0,0 +1,255 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .helpers import _Base + + +class TestQueryJobConfig(_Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {"query": {}}) + + def test_ctor_w_none(self): + config = self._make_one() + config.default_dataset = None + config.destination = None + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + + def test_ctor_w_properties(self): + config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) + + self.assertFalse(config.use_query_cache) + self.assertTrue(config.use_legacy_sql) + + def test_ctor_w_string_default_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._get_target_class()(default_dataset=default_dataset) + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_ctor_w_string_destinaton(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._get_target_class()(destination=destination) + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_default_dataset_w_string(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._make_one() + config.default_dataset = default_dataset + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_default_dataset_w_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + expected = dataset.DatasetReference.from_string(default_dataset) + config = self._make_one() + config.default_dataset = dataset.Dataset(expected) + self.assertEqual(config.default_dataset, expected) + + def test_destinaton_w_string(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._make_one() + config.destination = destination + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["query"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning(self): + from google.cloud.bigquery import table + + time_partitioning = table.TimePartitioning( + type_=table.TimePartitioningType.DAY, field="name" + ) + config = self._make_one() + config.time_partitioning = time_partitioning + # TimePartitioning should be configurable after assigning + time_partitioning.expiration_ms = 10000 + + self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, "name") + self.assertEqual(config.time_partitioning.expiration_ms, 10000) + + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + + def test_clustering_fields(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config.clustering_fields, fields) + + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + self.assertIsNone(config.destination_encryption_configuration) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + resource = { + "query": { + "useLegacySql": True, + "query": "no property for me", + "defaultDataset": { + "projectId": "someproject", + "datasetId": "somedataset", + }, + "someNewProperty": "I should be saved, too.", + }, + "dryRun": True, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, DatasetReference("someproject", "somedataset") + ) + self.assertTrue(config.dry_run) + # Make sure unknown properties propagate. + self.assertEqual(config._properties["query"]["query"], "no property for me") + self.assertEqual( + config._properties["query"]["someNewProperty"], "I should be saved, too." + ) + + def test_to_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference("someproject", "somedataset") + config.dry_run = False + config._properties["someNewProperty"] = "Woohoo, alpha stuff." + + resource = config.to_api_repr() + + self.assertFalse(resource["dryRun"]) + self.assertTrue(resource["query"]["useLegacySql"]) + self.assertEqual( + resource["query"]["defaultDataset"]["projectId"], "someproject" + ) + self.assertEqual( + resource["query"]["defaultDataset"]["datasetId"], "somedataset" + ) + # Make sure unknown properties propagate. + self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "query": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"query": {"destinationEncryptionConfiguration": None}} + ) + + def test_from_api_repr_with_encryption(self): + resource = { + "query": { + "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} + } + } + klass = self._get_target_class() + config = klass.from_api_repr(resource) + self.assertEqual( + config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME + ) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py new file mode 100644 index 000000000..37f4a6dec --- /dev/null +++ b/tests/unit/job/test_query_pandas.py @@ -0,0 +1,450 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json + +import mock +import pytest + +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_job_resource + + +@pytest.mark.parametrize( + "query,expected", + ( + (None, False), + ("", False), + ("select name, age from table", False), + ("select name, age from table LIMIT 10;", False), + ("select name, age from table order by other_column;", True), + ("Select name, age From table Order By other_column", True), + ("SELECT name, age FROM table ORDER BY other_column;", True), + ("select name, age from table order\nby other_column", True), + ("Select name, age From table Order\nBy other_column;", True), + ("SELECT name, age FROM table ORDER\nBY other_column", True), + ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), + ), +) +def test__contains_order_by(query, expected): + from google.cloud.bigquery import job as mut + + if expected: + assert mut._contains_order_by(query) + else: + assert not mut._contains_order_by(query) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.parametrize( + "query", + ( + "select name, age from table order by other_column;", + "Select name, age From table Order By other_column;", + "SELECT name, age FROM table ORDER BY other_column;", + "select name, age from table order\nby other_column;", + "Select name, age From table Order\nBy other_column;", + "SELECT name, age FROM table ORDER\nBY other_column;", + "SelecT name, age froM table OrdeR \n\t BY other_column;", + ), +) +def test_to_dataframe_bqstorage_preserve_order(query): + from google.cloud.bigquery.job import QueryJob as target_class + + job_resource = _make_job_resource( + project_id="test-project", job_type="query", ended=True + ) + job_resource["configuration"]["query"]["query"] = query + job_resource["status"] = {"state": "DONE"} + get_query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": "test-project", "jobId": "test-job"}, + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "totalRows": "4", + } + connection = _make_connection(get_query_results_resource, job_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(job_resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/test-project", + read_session=expected_session, + max_stream_count=1, # Use a single stream to preserve row order. + ) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + { + "name": "spouse_1", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + { + "name": "spouse_2", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + ] + }, + } + tabledata_resource = { + "rows": [ + { + "f": [ + {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, + {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, + ] + }, + { + "f": [ + {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, + {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, + ] + }, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + tbl = job.to_arrow(create_bqstorage_client=False) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + # Check the schema. + assert tbl.schema[0].name == "spouse_1" + assert tbl.schema[0].type[0].name == "name" + assert tbl.schema[0].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[0].type) + assert pyarrow.types.is_string(tbl.schema[0].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[0].type[1].type) + assert tbl.schema[1].name == "spouse_2" + assert tbl.schema[1].type[0].name == "name" + assert tbl.schema[1].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[1].type) + assert pyarrow.types.is_string(tbl.schema[1].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[1].type[1].type) + + # Check the data. + tbl_data = tbl.to_pydict() + spouse_1 = tbl_data["spouse_1"] + assert spouse_1 == [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bhettye Rhubble", "age": 27}, + ] + spouse_2 = tbl_data["spouse_2"] + assert spouse_2 == [ + {"name": "Wylma Phlyntstone", "age": 29}, + {"name": "Bharney Rhubble", "age": 33}, + ] + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + tabledata_resource = { + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df) == ["name", "age"] # verify the column names + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_ddl_query(): + from google.cloud.bigquery.job import QueryJob as target_class + + # Destination table may have no schema for some DDL and DML queries. + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "schema": {"fields": []}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + + df = job.to_dataframe() + + assert len(df) == 0 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, # Use default number of streams for best performance. + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "start_timestamp", "type": "TIMESTAMP"}, + {"name": "seconds", "type": "INT64"}, + {"name": "miles", "type": "FLOAT64"}, + {"name": "km", "type": "FLOAT64"}, + {"name": "payment_type", "type": "STRING"}, + {"name": "complete", "type": "BOOL"}, + {"name": "date", "type": "DATE"}, + ] + }, + } + row_data = [ + [ + "1.4338368E9", + "420", + "1.1", + "1.77", + "Cto_dataframeash", + "true", + "1999-12-01", + ], + ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + assert df.seconds.dtype.name == "int64" + assert df.miles.dtype.name == "float64" + assert df.km.dtype.name == "float16" + assert df.payment_type.dtype.name == "object" + assert df.complete.dtype.name == "bool" + assert df.date.dtype.name == "object" + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_date_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "1", + "schema": {"fields": [{"name": "date", "type": "DATE"}]}, + } + row_data = [ + ["1999-12-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + assert df.date.dtype.name == "datetime64[ns]" + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +@mock.patch("tqdm.tqdm") +def test_to_dataframe_with_progress_bar(tqdm_mock): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource, query_resource, + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) + tqdm_mock.assert_not_called() + + job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + tqdm_mock.assert_called() diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py new file mode 100644 index 000000000..09a0efc45 --- /dev/null +++ b/tests/unit/job/test_query_stats.py @@ -0,0 +1,356 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import _Base + + +class TestQueryPlanEntryStep(_Base): + KIND = "KIND" + SUBSTEPS = ("SUB1", "SUB2") + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep + + return QueryPlanEntryStep + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one("OTHER", self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + def test___eq___wrong_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertFalse(step == "hello") + + +class TestQueryPlanEntry(_Base): + NAME = "NAME" + ENTRY_ID = 1234 + START_MS = 1522540800000 + END_MS = 1522540804000 + INPUT_STAGES = (88, 101) + PARALLEL_INPUTS = 1000 + COMPLETED_PARALLEL_INPUTS = 5 + WAIT_MS_AVG = 33 + WAIT_MS_MAX = 400 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_MS_AVG = 45 + READ_MS_MAX = 90 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_MS_AVG = 55 + COMPUTE_MS_MAX = 99 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_MS_AVG = 203 + WRITE_MS_MAX = 340 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = "STATUS" + SHUFFLE_OUTPUT_BYTES = 1024 + SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + + START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" + END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertEqual(entry.input_stages, []) + self.assertIsNone(entry.start) + self.assertIsNone(entry.end) + self.assertIsNone(entry.parallel_inputs) + self.assertIsNone(entry.completed_parallel_inputs) + self.assertIsNone(entry.wait_ms_avg) + self.assertIsNone(entry.wait_ms_max) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ms_avg) + self.assertIsNone(entry.read_ms_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ms_avg) + self.assertIsNone(entry.compute_ms_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ms_avg) + self.assertIsNone(entry.write_ms_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertIsNone(entry.shuffle_output_bytes) + self.assertIsNone(entry.shuffle_output_bytes_spilled) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [ + QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS, + ) + ] + resource = { + "name": self.NAME, + "id": self.ENTRY_ID, + "inputStages": self.INPUT_STAGES, + "startMs": self.START_MS, + "endMs": self.END_MS, + "waitMsAvg": self.WAIT_MS_AVG, + "waitMsMax": self.WAIT_MS_MAX, + "waitRatioAvg": self.WAIT_RATIO_AVG, + "waitRatioMax": self.WAIT_RATIO_MAX, + "readMsAvg": self.READ_MS_AVG, + "readMsMax": self.READ_MS_MAX, + "readRatioAvg": self.READ_RATIO_AVG, + "readRatioMax": self.READ_RATIO_MAX, + "computeMsAvg": self.COMPUTE_MS_AVG, + "computeMsMax": self.COMPUTE_MS_MAX, + "computeRatioAvg": self.COMPUTE_RATIO_AVG, + "computeRatioMax": self.COMPUTE_RATIO_MAX, + "writeMsAvg": self.WRITE_MS_AVG, + "writeMsMax": self.WRITE_MS_MAX, + "writeRatioAvg": self.WRITE_RATIO_AVG, + "writeRatioMax": self.WRITE_RATIO_MAX, + "recordsRead": self.RECORDS_READ, + "recordsWritten": self.RECORDS_WRITTEN, + "status": self.STATUS, + "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, + "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, + "steps": [ + { + "kind": TestQueryPlanEntryStep.KIND, + "substeps": TestQueryPlanEntryStep.SUBSTEPS, + } + ], + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_start(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.start, None) + + entry._properties["startMs"] = self.START_MS + self.assertEqual( + entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS + ) + + def test_end(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.end, None) + + entry._properties["endMs"] = self.END_MS + self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) + + +class TestScriptStackFrame(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStackFrame + + return ScriptStackFrame(resource) + + def test_procedure_id(self): + frame = self._make_one({"procedureId": "some-procedure"}) + self.assertEqual(frame.procedure_id, "some-procedure") + del frame._properties["procedureId"] + self.assertIsNone(frame.procedure_id) + + def test_start_line(self): + frame = self._make_one({"startLine": 5}) + self.assertEqual(frame.start_line, 5) + frame._properties["startLine"] = "5" + self.assertEqual(frame.start_line, 5) + + def test_start_column(self): + frame = self._make_one({"startColumn": 29}) + self.assertEqual(frame.start_column, 29) + frame._properties["startColumn"] = "29" + self.assertEqual(frame.start_column, 29) + + def test_end_line(self): + frame = self._make_one({"endLine": 9}) + self.assertEqual(frame.end_line, 9) + frame._properties["endLine"] = "9" + self.assertEqual(frame.end_line, 9) + + def test_end_column(self): + frame = self._make_one({"endColumn": 14}) + self.assertEqual(frame.end_column, 14) + frame._properties["endColumn"] = "14" + self.assertEqual(frame.end_column, 14) + + def test_text(self): + frame = self._make_one({"text": "QUERY TEXT"}) + self.assertEqual(frame.text, "QUERY TEXT") + + +class TestScriptStatistics(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStatistics + + return ScriptStatistics(resource) + + def test_evalutation_kind(self): + stats = self._make_one({"evaluationKind": "EXPRESSION"}) + self.assertEqual(stats.evaluation_kind, "EXPRESSION") + self.assertEqual(stats.stack_frames, []) + + def test_stack_frames(self): + stats = self._make_one( + { + "stackFrames": [ + { + "procedureId": "some-procedure", + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + }, + {}, + ] + } + ) + stack_frames = stats.stack_frames + self.assertEqual(len(stack_frames), 2) + stack_frame = stack_frames[0] + self.assertEqual(stack_frame.procedure_id, "some-procedure") + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + stack_frame = stack_frames[1] + self.assertIsNone(stack_frame.procedure_id) + self.assertIsNone(stack_frame.start_line) + self.assertIsNone(stack_frame.start_column) + self.assertIsNone(stack_frame.end_line) + self.assertIsNone(stack_frame.end_column) + self.assertIsNone(stack_frame.text) + + +class TestTimelineEntry(_Base): + ELAPSED_MS = 101 + ACTIVE_UNITS = 50 + PENDING_UNITS = 98 + COMPLETED_UNITS = 520 + SLOT_MILLIS = 12029 + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import TimelineEntry + + return TimelineEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + entry = klass.from_api_repr({}) + self.assertIsNone(entry.elapsed_ms) + self.assertIsNone(entry.active_units) + self.assertIsNone(entry.pending_units) + self.assertIsNone(entry.completed_units) + self.assertIsNone(entry.slot_millis) + + def test_from_api_repr_normal(self): + resource = { + "elapsedMs": self.ELAPSED_MS, + "activeUnits": self.ACTIVE_UNITS, + "pendingUnits": self.PENDING_UNITS, + "completedUnits": self.COMPLETED_UNITS, + "totalSlotMs": self.SLOT_MILLIS, + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) + self.assertEqual(entry.active_units, self.ACTIVE_UNITS) + self.assertEqual(entry.pending_units, self.PENDING_UNITS) + self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) + self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py deleted file mode 100644 index 8590e0576..000000000 --- a/tests/unit/test_job.py +++ /dev/null @@ -1,6448 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import concurrent -import copy -import json -import textwrap -import unittest -import warnings - -import freezegun -from google.api_core import exceptions -import google.api_core.retry -import mock -import pytest -import requests -from six.moves import http_client - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -try: - from tqdm import tqdm -except (ImportError, AttributeError): # pragma: NO COVER - tqdm = None - -import google.cloud.bigquery.query - - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) - - -def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - - if connection is None: - connection = _make_connection() - - client = Client(project=project, credentials=_make_credentials(), _http=object()) - client._connection = connection - return client - - -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - -def _make_retriable_exception(): - return exceptions.TooManyRequests( - "retriable exception", errors=[{"reason": "rateLimitExceeded"}] - ) - - -def _make_job_resource( - creation_time_ms=1437767599006, - started_time_ms=1437767600007, - ended_time_ms=1437767601008, - started=False, - ended=False, - etag="abc-def-hjk", - endpoint="https://bigquery.googleapis.com", - job_type="load", - job_id="a-random-id", - project_id="some-project", - user_email="bq-user@example.com", -): - resource = { - "status": {"state": "PENDING"}, - "configuration": {job_type: {}}, - "statistics": {"creationTime": creation_time_ms, job_type: {}}, - "etag": etag, - "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, - "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( - endpoint, project_id, job_id - ), - "user_email": user_email, - } - - if started or ended: - resource["statistics"]["startTime"] = started_time_ms - resource["status"]["state"] = "RUNNING" - - if ended: - resource["statistics"]["endTime"] = ended_time_ms - resource["status"]["state"] = "DONE" - - if job_type == "query": - resource["configuration"]["query"]["destinationTable"] = { - "projectId": project_id, - "datasetId": "_temp_dataset", - "tableId": "_temp_table", - } - - return resource - - -class Test__error_result_to_exception(unittest.TestCase): - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._error_result_to_exception(*args, **kwargs) - - def test_simple(self): - error_result = {"reason": "invalid", "message": "bad request"} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.BAD_REQUEST) - self.assertTrue(exception.message.startswith("bad request")) - self.assertIn(error_result, exception.errors) - - def test_missing_reason(self): - error_result = {} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) - - -class Test_JobReference(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobReference - - def _make_one(self, job_id, project, location): - return self._get_target_class()(job_id, project, location) - - def test_ctor(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - def test__to_api_repr(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual( - job_ref._to_api_repr(), - { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - }, - ) - - def test_from_api_repr(self): - api_repr = { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - } - - job_ref = self._get_target_class()._from_api_repr(api_repr) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - -class Test_AsyncJob(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._AsyncJob - - def _make_one(self, job_id, client): - return self._get_target_class()(job_id, client) - - def _make_derived_class(self): - class Derived(self._get_target_class()): - _JOB_TYPE = "derived" - - return Derived - - def _make_derived(self, job_id, client): - return self._make_derived_class()(job_id, client) - - @staticmethod - def _job_reference(job_id, project, location): - from google.cloud.bigquery import job - - return job._JobReference(job_id, project, location) - - def test_ctor_w_bare_job_id(self): - import threading - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertIsNone(job.location) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, - ) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test_ctor_w_job_ref(self): - import threading - - other_project = "other-project-234" - client = _make_client(project=other_project) - job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) - job = self._make_one(job_ref, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertEqual(job.location, self.LOCATION) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - { - "jobReference": { - "projectId": self.PROJECT, - "location": self.LOCATION, - "jobId": self.JOB_ID, - } - }, - ) - self.assertFalse(job._result_set) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test__require_client_w_none(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(None), client) - - def test__require_client_w_other(self): - client = _make_client(project=self.PROJECT) - other = object() - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(other), other) - - def test_job_type(self): - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - - self.assertEqual(derived.job_type, "derived") - - def test_parent_job_id(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.parent_job_id) - job._properties["statistics"] = {"parentJobId": "parent-job-123"} - self.assertEqual(job.parent_job_id, "parent-job-123") - - def test_script_statistics(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.script_statistics) - job._properties["statistics"] = { - "scriptStatistics": { - "evaluationKind": "EXPRESSION", - "stackFrames": [ - { - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - } - ], - } - } - script_stats = job.script_statistics - self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") - stack_frames = script_stats.stack_frames - self.assertEqual(len(stack_frames), 1) - stack_frame = stack_frames[0] - self.assertIsNone(stack_frame.procedure_id) - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - - def test_num_child_jobs(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.num_child_jobs, 0) - job._properties["statistics"] = {"numChildJobs": "17"} - self.assertEqual(job.num_child_jobs, 17) - - def test_labels_miss(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertEqual(job.labels, {}) - - def test_labels_update_in_place(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - labels = job.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["labels"] = labels - self.assertEqual(job.labels, labels) - - def test_etag(self): - etag = "ETAG-123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.etag) - job._properties["etag"] = etag - self.assertEqual(job.etag, etag) - - def test_self_link(self): - self_link = "https://api.example.com/123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.self_link) - job._properties["selfLink"] = self_link - self.assertEqual(job.self_link, self_link) - - def test_user_email(self): - user_email = "user@example.com" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.user_email) - job._properties["user_email"] = user_email - self.assertEqual(job.user_email, user_email) - - @staticmethod - def _datetime_and_millis(): - import datetime - import pytz - from google.cloud._helpers import _millis - - now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision - ) - return now, _millis(now) - - def test_created(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.created) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.created) - stats["creationTime"] = millis - self.assertEqual(job.created, now) - - def test_started(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.started) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.started) - stats["startTime"] = millis - self.assertEqual(job.started, now) - - def test_ended(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.ended) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.ended) - stats["endTime"] = millis - self.assertEqual(job.ended, now) - - def test__job_statistics(self): - statistics = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - self.assertEqual(derived._job_statistics(), {}) - stats = derived._properties["statistics"] = {} - self.assertEqual(derived._job_statistics(), {}) - stats["derived"] = statistics - self.assertEqual(derived._job_statistics(), statistics) - - def test_error_result(self): - error_result = { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.error_result) - status = job._properties["status"] = {} - self.assertIsNone(job.error_result) - status["errorResult"] = error_result - self.assertEqual(job.error_result, error_result) - - def test_errors(self): - errors = [ - { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.errors) - status = job._properties["status"] = {} - self.assertIsNone(job.errors) - status["errors"] = errors - self.assertEqual(job.errors, errors) - - def test_state(self): - state = "STATE" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.state) - status = job._properties["status"] = {} - self.assertIsNone(job.state) - status["state"] = state - self.assertEqual(job.state, state) - - def _set_properties_job(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._set_future_result = mock.Mock() - job._properties = { - "jobReference": job._properties["jobReference"], - "foo": "bar", - } - return job - - def test__set_properties_no_stats(self): - config = {"test": True} - resource = {"configuration": config} - job = self._set_properties_job() - - job._set_properties(resource) - - self.assertEqual(job._properties, resource) - - def test__set_properties_w_creation_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"creationTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["creationTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_start_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"startTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["startTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_end_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"endTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["endTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__check_resource_config_missing_job_ref(self): - resource = {} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_job_id(self): - resource = {"jobReference": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_configuration(self): - resource = {"jobReference": {"jobId": self.JOB_ID}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_config_type(self): - resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_ok(self): - derived_config = {"foo": "bar"} - resource = { - "jobReference": {"jobId": self.JOB_ID}, - "configuration": {"derived": derived_config}, - } - klass = self._make_derived_class() - - # Should not throw. - klass._check_resource_config(resource) - - def test__build_resource(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job._build_resource() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test_to_api_repr(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job.to_api_repr() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test__begin_already(self): - job = self._set_properties_job() - job._properties["status"] = {"state": "WHATEVER"} - - with self.assertRaises(ValueError): - job._begin() - - def test__begin_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test__begin_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin(client=client, retry=retry, timeout=7.5) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_exists_defaults_miss(self): - from google.cloud.exceptions import NotFound - from google.cloud.bigquery.retry import DEFAULT_RETRY - - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.side_effect = NotFound("testing") - self.assertFalse(job.exists()) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id", "location": self.LOCATION}, - timeout=None, - ) - - def test_exists_explicit_hit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - self.assertTrue(job.exists(client=client, retry=retry)) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id"}, - timeout=None, - ) - - def test_exists_w_timeout(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - job = self._set_properties_job() - call_api = job._client._call_api = mock.Mock() - job.exists(timeout=7.5) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={"path": PATH}, - job_ref=job, - method="GET", - path=PATH, - query_params={"fields": "id"}, - timeout=7.5, - ) - - def test_reload_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - job.reload() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_reload_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - job.reload(client=client, retry=retry, timeout=4.2) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=4.2, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_defaults(self): - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel()) - - final_attributes.assert_called() - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_explicit(self): - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel(client=client, timeout=7.5)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, resource) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - - def test__set_future_result_wo_done(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_result_set(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - job._result_set = True - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_w_error(self): - from google.cloud.exceptions import NotFound - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "notFound", "message": "testing"}, - } - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_called_once() - args, kw = set_exception.call_args - (exception,) = args - self.assertIsInstance(exception, NotFound) - self.assertEqual(exception.message, "testing") - self.assertEqual(kw, {}) - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_wo_error(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_called_once_with(job) - - def test_done_defaults_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - - self.assertFalse(job.done()) - - reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - - def test_done_explicit_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - retry = DEFAULT_RETRY.with_deadline(1) - - self.assertFalse(job.done(retry=retry, timeout=7.5)) - - reload_.assert_called_once_with(retry=retry, timeout=7.5) - - def test_done_already(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - - self.assertTrue(job.done()) - - def test_result_default_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - _make_retriable_exception(), - begun_job_resource, - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job.result(), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_w_retry_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_explicit_w_state(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - # Use _set_properties() instead of directly modifying _properties so - # that the result state is set properly. - job_resource = job._properties - job_resource["status"] = {"state": "DONE"} - job._set_properties(job_resource) - timeout = 1 - - self.assertIs(job.result(timeout=timeout), job) - - conn.api_request.assert_not_called() - - def test_cancelled_wo_error_result(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_not_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "other"}} - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_w_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "stopped"}} - - self.assertTrue(job.cancelled()) - - -class Test_JobConfig(unittest.TestCase): - JOB_TYPE = "testing" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobConfig - - def _make_one(self, job_type=JOB_TYPE): - return self._get_target_class()(job_type) - - def test_ctor(self): - job_config = self._make_one() - self.assertEqual(job_config._job_type, self.JOB_TYPE) - self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) - - def test_fill_from_default(self): - from google.cloud.bigquery import QueryJobConfig - - job_config = QueryJobConfig() - job_config.dry_run = True - job_config.maximum_bytes_billed = 1000 - - default_job_config = QueryJobConfig() - default_job_config.use_query_cache = True - default_job_config.maximum_bytes_billed = 2000 - - final_job_config = job_config._fill_from_default(default_job_config) - self.assertTrue(final_job_config.dry_run) - self.assertTrue(final_job_config.use_query_cache) - self.assertEqual(final_job_config.maximum_bytes_billed, 1000) - - def test_fill_from_default_conflict(self): - from google.cloud.bigquery import QueryJobConfig - - basic_job_config = QueryJobConfig() - conflicting_job_config = self._make_one("conflicting_job_type") - self.assertNotEqual( - basic_job_config._job_type, conflicting_job_config._job_type - ) - - with self.assertRaises(TypeError): - basic_job_config._fill_from_default(conflicting_job_config) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_wo_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=None - ) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_w_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - default = "default" - self.assertIs( - job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value - ) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=default - ) - - @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") - def test__set_sub_prop(self, _set_sub_prop): - job_config = self._make_one() - key = "key" - value = "value" - job_config._set_sub_prop(key, value) - _set_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], value - ) - - def test_to_api_repr(self): - job_config = self._make_one() - expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} - found = job_config.to_api_repr() - self.assertEqual(found, expected) - self.assertIsNot(found, expected) # copied - - # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes - # the ctor can be called w/o arguments - - def test_labels_miss(self): - job_config = self._make_one() - self.assertEqual(job_config.labels, {}) - - def test_labels_update_in_place(self): - job_config = self._make_one() - labels = job_config.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job_config.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config._properties["labels"] = labels - self.assertEqual(job_config.labels, labels) - - def test_labels_setter_invalid(self): - labels = object() - job_config = self._make_one() - with self.assertRaises(ValueError): - job_config.labels = labels - - def test_labels_setter(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config.labels = labels - self.assertEqual(job_config._properties["labels"], labels) - - -class _Base(object): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.table import TableReference - - ENDPOINT = "https://bigquery.googleapis.com" - PROJECT = "project" - SOURCE1 = "http://example.com/source1.csv" - DS_ID = "dataset_id" - DS_REF = DatasetReference(PROJECT, DS_ID) - TABLE_ID = "table_id" - TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_ID = "JOB_ID" - KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _setUpConstants(self): - import datetime - from google.cloud._helpers import UTC - - self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) - self.ETAG = "ETAG" - self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( - self.ENDPOINT, self.PROJECT, self.JOB_ID - ) - self.USER_EMAIL = "phred@example.com" - - def _table_ref(self, table_id): - from google.cloud.bigquery.table import TableReference - - return TableReference(self.DS_REF, table_id) - - def _make_resource(self, started=False, ended=False): - self._setUpConstants() - return _make_job_resource( - creation_time_ms=int(self.WHEN_TS * 1000), - started_time_ms=int(self.WHEN_TS * 1000), - ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, - started=started, - ended=ended, - etag=self.ETAG, - endpoint=self.ENDPOINT, - job_type=self.JOB_TYPE, - job_id=self.JOB_ID, - project_id=self.PROJECT, - user_email=self.USER_EMAIL, - ) - - def _verifyInitialReadonlyProperties(self, job): - # root elements of resource - self.assertIsNone(job.etag) - self.assertIsNone(job.self_link) - self.assertIsNone(job.user_email) - - # derived from resource['statistics'] - self.assertIsNone(job.created) - self.assertIsNone(job.started) - self.assertIsNone(job.ended) - - # derived from resource['status'] - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - def _verifyReadonlyResourceProperties(self, job, resource): - from datetime import timedelta - - statistics = resource.get("statistics", {}) - - if "creationTime" in statistics: - self.assertEqual(job.created, self.WHEN) - else: - self.assertIsNone(job.created) - - if "startTime" in statistics: - self.assertEqual(job.started, self.WHEN) - else: - self.assertIsNone(job.started) - - if "endTime" in statistics: - self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) - else: - self.assertIsNone(job.ended) - - if "etag" in resource: - self.assertEqual(job.etag, self.ETAG) - else: - self.assertIsNone(job.etag) - - if "selfLink" in resource: - self.assertEqual(job.self_link, self.RESOURCE_URL) - else: - self.assertIsNone(job.self_link) - - if "user_email" in resource: - self.assertEqual(job.user_email, self.USER_EMAIL) - else: - self.assertIsNone(job.user_email) - - -class TestLoadJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJobConfig - - return LoadJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()( - allow_jagged_rows=True, allow_quoted_newlines=True - ) - - self.assertTrue(config.allow_jagged_rows) - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_jagged_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_jagged_rows) - - def test_allow_jagged_rows_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowJaggedRows"] = True - self.assertTrue(config.allow_jagged_rows) - - def test_allow_jagged_rows_setter(self): - config = self._get_target_class()() - config.allow_jagged_rows = True - self.assertTrue(config._properties["load"]["allowJaggedRows"]) - - def test_allow_quoted_newlines_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowQuotedNewlines"] = True - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_setter(self): - config = self._get_target_class()() - config.allow_quoted_newlines = True - self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) - - def test_autodetect_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.autodetect) - - def test_autodetect_hit(self): - config = self._get_target_class()() - config._properties["load"]["autodetect"] = True - self.assertTrue(config.autodetect) - - def test_autodetect_setter(self): - config = self._get_target_class()() - config.autodetect = True - self.assertTrue(config._properties["load"]["autodetect"]) - - def test_clustering_fields_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.clustering_fields) - - def test_clustering_fields_hit(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - self.assertEqual(config.clustering_fields, fields) - - def test_clustering_fields_setter(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) - - def test_clustering_fields_setter_w_none(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - self.assertNotIn("clustering", config._properties["load"]) - - def test_create_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.create_disposition) - - def test_create_disposition_hit(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config._properties["load"]["createDisposition"] = disposition - self.assertEqual(config.create_disposition, disposition) - - def test_create_disposition_setter(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config.create_disposition = disposition - self.assertEqual(config._properties["load"]["createDisposition"], disposition) - - def test_destination_encryption_configuration_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_encryption_configuration) - - def test_destination_encryption_configuration_hit(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - self.assertEqual( - config.destination_encryption_configuration, encryption_configuration - ) - - def test_destination_encryption_configuration_setter(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config.destination_encryption_configuration = encryption_configuration - expected = {"kmsKeyName": kms_key_name} - self.assertEqual( - config._properties["load"]["destinationEncryptionConfiguration"], expected - ) - - def test_destination_encryption_configuration_setter_w_none(self): - kms_key_name = "kms-key-name" - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - config.destination_encryption_configuration = None - self.assertIsNone(config.destination_encryption_configuration) - self.assertNotIn( - "destinationEncryptionConfiguration", config._properties["load"] - ) - - def test_destination_table_description_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_description) - - def test_destination_table_description_hit(self): - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - self.assertEqual(config.destination_table_description, description) - - def test_destination_table_description_setter(self): - description = "Description" - config = self._get_target_class()() - config.destination_table_description = description - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_setter_w_fn_already(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - config.destination_table_description = description - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_w_none(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_description = None - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_friendly_name) - - def test_destination_table_friendly_name_hit(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_destination_table_friendly_name_setter(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_setter_w_descr_already(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_w_none(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_friendly_name = None - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_encoding_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.encoding) - - def test_encoding_hit(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config._properties["load"]["encoding"] = encoding - self.assertEqual(config.encoding, encoding) - - def test_encoding_setter(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config.encoding = encoding - self.assertEqual(config._properties["load"]["encoding"], encoding) - - def test_field_delimiter_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.field_delimiter) - - def test_field_delimiter_hit(self): - field_delimiter = "|" - config = self._get_target_class()() - config._properties["load"]["fieldDelimiter"] = field_delimiter - self.assertEqual(config.field_delimiter, field_delimiter) - - def test_field_delimiter_setter(self): - field_delimiter = "|" - config = self._get_target_class()() - config.field_delimiter = field_delimiter - self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) - - def test_hive_partitioning_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.hive_partitioning) - - def test_hive_partitioning_hit(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - config = self._get_target_class()() - config._properties["load"]["hivePartitioningOptions"] = { - "sourceUriPrefix": "http://foo/bar", - "mode": "STRINGS", - } - result = config.hive_partitioning - self.assertIsInstance(result, HivePartitioningOptions) - self.assertEqual(result.source_uri_prefix, "http://foo/bar") - self.assertEqual(result.mode, "STRINGS") - - def test_hive_partitioning_setter(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - hive_partitioning = HivePartitioningOptions() - hive_partitioning.source_uri_prefix = "http://foo/bar" - hive_partitioning.mode = "AUTO" - - config = self._get_target_class()() - config.hive_partitioning = hive_partitioning - self.assertEqual( - config._properties["load"]["hivePartitioningOptions"], - {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, - ) - - config.hive_partitioning = None - self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) - - def test_hive_partitioning_invalid_type(self): - config = self._get_target_class()() - - with self.assertRaises(TypeError): - config.hive_partitioning = {"mode": "AUTO"} - - def test_ignore_unknown_values_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.ignore_unknown_values) - - def test_ignore_unknown_values_hit(self): - config = self._get_target_class()() - config._properties["load"]["ignoreUnknownValues"] = True - self.assertTrue(config.ignore_unknown_values) - - def test_ignore_unknown_values_setter(self): - config = self._get_target_class()() - config.ignore_unknown_values = True - self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) - - def test_max_bad_records_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.max_bad_records) - - def test_max_bad_records_hit(self): - max_bad_records = 13 - config = self._get_target_class()() - config._properties["load"]["maxBadRecords"] = max_bad_records - self.assertEqual(config.max_bad_records, max_bad_records) - - def test_max_bad_records_setter(self): - max_bad_records = 13 - config = self._get_target_class()() - config.max_bad_records = max_bad_records - self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) - - def test_null_marker_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.null_marker) - - def test_null_marker_hit(self): - null_marker = "XXX" - config = self._get_target_class()() - config._properties["load"]["nullMarker"] = null_marker - self.assertEqual(config.null_marker, null_marker) - - def test_null_marker_setter(self): - null_marker = "XXX" - config = self._get_target_class()() - config.null_marker = null_marker - self.assertEqual(config._properties["load"]["nullMarker"], null_marker) - - def test_quote_character_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.quote_character) - - def test_quote_character_hit(self): - quote_character = "'" - config = self._get_target_class()() - config._properties["load"]["quote"] = quote_character - self.assertEqual(config.quote_character, quote_character) - - def test_quote_character_setter(self): - quote_character = "'" - config = self._get_target_class()() - config.quote_character = quote_character - self.assertEqual(config._properties["load"]["quote"], quote_character) - - def test_schema_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema) - - def test_schema_hit(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - all_props_repr = { - "mode": "REQUIRED", - "name": "foo", - "type": "INTEGER", - "description": "Foo", - } - minimal_repr = {"name": "bar", "type": "STRING"} - config._properties["load"]["schema"] = { - "fields": [all_props_repr, minimal_repr] - } - all_props, minimal = config.schema - self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) - self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) - - def test_schema_setter_fields(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_valid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_invalid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, - ] - - with self.assertRaises(Exception): - config.schema = schema - - def test_schema_setter_unsetting_schema(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - config._properties["load"]["schema"] = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - config.schema = None - self.assertNotIn("schema", config._properties["load"]) - config.schema = None # no error, idempotent operation - - def test_schema_update_options_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema_update_options) - - def test_schema_update_options_hit(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config._properties["load"]["schemaUpdateOptions"] = options - self.assertEqual(config.schema_update_options, options) - - def test_schema_update_options_setter(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config.schema_update_options = options - self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) - - def test_skip_leading_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.skip_leading_rows) - - def test_skip_leading_rows_hit_w_str(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_hit_w_integer(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = skip_leading_rows - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_setter(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config.skip_leading_rows = skip_leading_rows - self.assertEqual( - config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) - ) - - def test_source_format_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.source_format) - - def test_source_format_hit(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config._properties["load"]["sourceFormat"] = source_format - self.assertEqual(config.source_format, source_format) - - def test_source_format_setter(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config.source_format = source_format - self.assertEqual(config._properties["load"]["sourceFormat"], source_format) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["load"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.time_partitioning) - - def test_time_partitioning_hit(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - with warnings.catch_warnings(record=True) as warned: - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - self.assertEqual(config.time_partitioning, expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - - with warnings.catch_warnings(record=True) as warned: - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - - config = self._get_target_class()() - config.time_partitioning = time_partitioning - expected = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - self.assertEqual(config._properties["load"]["timePartitioning"], expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter_w_none(self): - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - self.assertNotIn("timePartitioning", config._properties["load"]) - - def test_use_avro_logical_types(self): - config = self._get_target_class()() - self.assertIsNone(config.use_avro_logical_types) - - def test_use_avro_logical_types_setter(self): - config = self._get_target_class()() - config.use_avro_logical_types = True - self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) - - def test_write_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.write_disposition) - - def test_write_disposition_hit(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config._properties["load"]["writeDisposition"] = write_disposition - self.assertEqual(config.write_disposition, write_disposition) - - def test_write_disposition_setter(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config.write_disposition = write_disposition - self.assertEqual( - config._properties["load"]["writeDisposition"], write_disposition - ) - - -class TestLoadJob(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJob - - return LoadJob - - def _setUpConstants(self): - super(TestLoadJob, self)._setUpConstants() - self.INPUT_FILES = 2 - self.INPUT_BYTES = 12345 - self.OUTPUT_BYTES = 23456 - self.OUTPUT_ROWS = 345 - - def _make_resource(self, started=False, ended=False): - resource = super(TestLoadJob, self)._make_resource(started, ended) - config = resource["configuration"]["load"] - config["sourceUris"] = [self.SOURCE1] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - - if ended: - resource["status"] = {"state": "DONE"} - resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES - resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES - resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES - resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS - - return resource - - def _verifyBooleanConfigProperties(self, job, config): - if "allowJaggedRows" in config: - self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) - else: - self.assertIsNone(job.allow_jagged_rows) - if "allowQuotedNewlines" in config: - self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) - else: - self.assertIsNone(job.allow_quoted_newlines) - if "autodetect" in config: - self.assertEqual(job.autodetect, config["autodetect"]) - else: - self.assertIsNone(job.autodetect) - if "ignoreUnknownValues" in config: - self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) - else: - self.assertIsNone(job.ignore_unknown_values) - if "useAvroLogicalTypes" in config: - self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) - else: - self.assertIsNone(job.use_avro_logical_types) - - def _verifyEnumConfigProperties(self, job, config): - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "encoding" in config: - self.assertEqual(job.encoding, config["encoding"]) - else: - self.assertIsNone(job.encoding) - if "sourceFormat" in config: - self.assertEqual(job.source_format, config["sourceFormat"]) - else: - self.assertIsNone(job.source_format) - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "schemaUpdateOptions" in config: - self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) - else: - self.assertIsNone(job.schema_update_options) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("load") - - self._verifyBooleanConfigProperties(job, config) - self._verifyEnumConfigProperties(job, config) - - self.assertEqual(job.source_uris, config["sourceUris"]) - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - if "maxBadRecords" in config: - self.assertEqual(job.max_bad_records, config["maxBadRecords"]) - else: - self.assertIsNone(job.max_bad_records) - if "nullMarker" in config: - self.assertEqual(job.null_marker, config["nullMarker"]) - else: - self.assertIsNone(job.null_marker) - if "quote" in config: - self.assertEqual(job.quote_character, config["quote"]) - else: - self.assertIsNone(job.quote_character) - if "skipLeadingRows" in config: - self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) - else: - self.assertIsNone(job.skip_leading_rows) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(job.destination, self.TABLE_REF) - self.assertEqual(list(job.source_uris), [self.SOURCE1]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # derived from resource['statistics']['load'] - self.assertIsNone(job.input_file_bytes) - self.assertIsNone(job.input_files) - self.assertIsNone(job.output_bytes) - self.assertIsNone(job.output_rows) - - # set/read from resource['configuration']['load'] - self.assertIsNone(job.schema) - self.assertIsNone(job.allow_jagged_rows) - self.assertIsNone(job.allow_quoted_newlines) - self.assertIsNone(job.autodetect) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.encoding) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.ignore_unknown_values) - self.assertIsNone(job.max_bad_records) - self.assertIsNone(job.null_marker) - self.assertIsNone(job.quote_character) - self.assertIsNone(job.skip_leading_rows) - self.assertIsNone(job.source_format) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.destination_table_description) - self.assertIsNone(job.destination_table_friendly_name) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.use_avro_logical_types) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_config(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.job import LoadJobConfig - - client = _make_client(project=self.PROJECT) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - self.assertEqual(job.schema, [full_name, age]) - config.destination_table_description = "Description" - expected = {"description": "Description"} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - friendly_name = "Friendly Name" - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_ctor_w_job_reference(self): - from google.cloud.bigquery import job - - client = _make_client(project=self.PROJECT) - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(load_job.project, "alternative-project") - self.assertEqual(load_job.location, "US") - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - self.assertTrue(job.done()) - - def test_result(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIs(result, job) - - def test_result_invokes_begin(self): - begun_resource = self._make_resource() - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) - client = _make_client(self.PROJECT) - client._connection = connection - - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 2) - begin_request, reload_request = connection.api_request.call_args_list - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_schema_setter_non_list(self): - from google.cloud.bigquery.job import LoadJobConfig - - config = LoadJobConfig() - with self.assertRaises(TypeError): - config.schema = object() - - def test_schema_setter_invalid_field(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): - config.schema = [full_name, object()] - - def test_schema_setter(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - self.assertEqual(config.schema, [full_name, age]) - - def test_props_set_by_server(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) - STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) - ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - EMAIL = "phred@example.com" - ERROR_RESULT = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["etag"] = "ETAG" - job._properties["id"] = FULL_JOB_ID - job._properties["selfLink"] = URL - job._properties["user_email"] = EMAIL - - statistics = job._properties["statistics"] = {} - statistics["creationTime"] = _millis(CREATED) - statistics["startTime"] = _millis(STARTED) - statistics["endTime"] = _millis(ENDED) - - self.assertEqual(job.etag, "ETAG") - self.assertEqual(job.self_link, URL) - self.assertEqual(job.user_email, EMAIL) - - self.assertEqual(job.created, CREATED) - self.assertEqual(job.started, STARTED) - self.assertEqual(job.ended, ENDED) - - # running jobs have no load stats not yet set. - self.assertIsNone(job.output_bytes) - - load_stats = statistics["load"] = {} - load_stats["inputFileBytes"] = 12345 - load_stats["inputFiles"] = 1 - load_stats["outputBytes"] = 23456 - load_stats["outputRows"] = 345 - - self.assertEqual(job.input_file_bytes, 12345) - self.assertEqual(job.input_files, 1) - self.assertEqual(job.output_bytes, 23456) - self.assertEqual(job.output_rows, 345) - - status = job._properties["status"] = {} - - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - status["errorResult"] = ERROR_RESULT - status["errors"] = [ERROR_RESULT] - status["state"] = "STATE" - - self.assertEqual(job.error_result, ERROR_RESULT) - self.assertEqual(job.errors, [ERROR_RESULT]) - self.assertEqual(job.state, "STATE") - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - load_config = RESOURCE["configuration"]["load"] - load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_already_running(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["status"] = {"state": "RUNNING"} - - with self.assertRaises(ValueError): - job._begin() - - def test_begin_w_bound_client(self): - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - path = "/projects/{}/jobs".format(self.PROJECT) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_autodetect(self): - from google.cloud.bigquery.job import LoadJobConfig - - path = "/projects/{}/jobs".format(self.PROJECT) - resource = self._make_resource() - resource["configuration"]["load"]["autodetect"] = True - # Ensure None for missing server-set props - del resource["statistics"]["creationTime"] - del resource["etag"] - del resource["selfLink"] - del resource["user_email"] - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - config = LoadJobConfig() - config.autodetect = True - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - sent = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "autodetect": True, - } - }, - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=sent, timeout=None - ) - self._verifyResourceProperties(job, resource) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - from google.cloud.bigquery.schema import SchemaField - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - LOAD_CONFIGURATION = { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "allowJaggedRows": True, - "allowQuotedNewlines": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "encoding": "ISO-8559-1", - "fieldDelimiter": "|", - "ignoreUnknownValues": True, - "maxBadRecords": 100, - "nullMarker": r"\N", - "quote": "'", - "skipLeadingRows": "1", - "sourceFormat": "CSV", - "useAvroLogicalTypes": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ] - }, - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], - } - RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config - ) - config.allow_jagged_rows = True - config.allow_quoted_newlines = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.encoding = "ISO-8559-1" - config.field_delimiter = "|" - config.ignore_unknown_values = True - config.max_bad_records = 100 - config.null_marker = r"\N" - config.quote_character = "'" - config.skip_leading_rows = 1 - config.source_format = "CSV" - config.use_avro_logical_types = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - self.assertEqual(len(conn2.api_request.call_args_list), 1) - req = conn2.api_request.call_args_list[0] - self.assertEqual(req[1]["method"], "POST") - self.assertEqual(req[1]["path"], PATH) - SENT = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"load": LOAD_CONFIGURATION}, - } - self.maxDiff = None - self.assertEqual(req[1]["data"], SENT) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource() - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job._begin() - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs"}, client, load_job - ) - - conn.api_request.assert_called_once() - _, request = conn.api_request.call_args - self.assertEqual(request["method"], "POST") - self.assertEqual(request["path"], "/projects/alternative-project/jobs") - self.assertEqual( - request["data"]["jobReference"]["projectId"], "alternative-project" - ) - self.assertEqual(request["data"]["jobReference"]["location"], "US") - self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client2, - job, - ) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_miss_w_job_reference(self): - from google.cloud.bigquery import job - - job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(load_job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs/my-job-id", - query_params={"fields": "id", "location": "US"}, - timeout=None, - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.reload() - - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, - client, - load_job, - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - def test_cancel_w_bound_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.cancel() - - final_attributes.assert_called_with( - { - "path": "/projects/alternative-project/jobs/{}/cancel".format( - self.JOB_ID - ) - }, - client, - load_job, - ) - conn.api_request.assert_called_once_with( - method="POST", - path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - -class TestCopyJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "copy" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJobConfig - - return CopyJobConfig - - def test_ctor_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - create_disposition = CreateDisposition.CREATE_NEVER - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition - ) - - self.assertEqual(config.create_disposition, create_disposition) - self.assertEqual(config.write_disposition, write_disposition) - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "copy": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"copy": {"destinationEncryptionConfiguration": None}} - ) - - -class TestCopyJob(unittest.TestCase, _Base): - JOB_TYPE = "copy" - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJob - - return CopyJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestCopyJob, self)._make_resource(started, ended) - config = resource["configuration"]["copy"] - config["sourceTables"] = [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("copy") - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - sources = config.get("sourceTables") - if sources is None: - sources = [config["sourceTable"]] - self.assertEqual(len(sources), len(job.sources)) - for table_ref, table in zip(sources, job.sources): - self.assertEqual(table.project, table_ref["projectId"]) - self.assertEqual(table.dataset_id, table_ref["datasetId"]) - self.assertEqual(table.table_id, table_ref["tableId"]) - - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - self.assertEqual(job.destination, destination) - self.assertEqual(job.sources, [source]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['copy'] - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_sourcetable(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_wo_sources(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - with self.assertRaises(KeyError): - _ = job.sources - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - copy_config = RESOURCE["configuration"]["copy"] - copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CopyJobConfig - - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - COPY_CONFIGURATION = { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_NEVER, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - config = CopyJobConfig() - config.create_disposition = CreateDisposition.CREATE_NEVER - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job = self._make_one(self.JOB_ID, [source], destination, client1, config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"copy": COPY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestExtractJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "extract" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJobConfig - - return ExtractJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()(field_delimiter="\t", print_header=True) - - self.assertEqual(config.field_delimiter, "\t") - self.assertTrue(config.print_header) - - def test_to_api_repr(self): - from google.cloud.bigquery import job - - config = self._make_one() - config.compression = job.Compression.SNAPPY - config.destination_format = job.DestinationFormat.AVRO - config.field_delimiter = "ignored for avro" - config.print_header = False - config._properties["extract"]["someNewField"] = "some-value" - config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "extract": { - "compression": "SNAPPY", - "destinationFormat": "AVRO", - "fieldDelimiter": "ignored for avro", - "printHeader": False, - "someNewField": "some-value", - "useAvroLogicalTypes": True, - } - }, - ) - - def test_from_api_repr(self): - cls = self._get_target_class() - config = cls.from_api_repr( - { - "extract": { - "compression": "NONE", - "destinationFormat": "CSV", - "fieldDelimiter": "\t", - "printHeader": True, - "someNewField": "some-value", - "useAvroLogicalTypes": False, - } - } - ) - self.assertEqual(config.compression, "NONE") - self.assertEqual(config.destination_format, "CSV") - self.assertEqual(config.field_delimiter, "\t") - self.assertEqual(config.print_header, True) - self.assertEqual(config._properties["extract"]["someNewField"], "some-value") - self.assertEqual(config.use_avro_logical_types, False) - - -class TestExtractJob(unittest.TestCase, _Base): - JOB_TYPE = "extract" - SOURCE_TABLE = "source_table" - DESTINATION_URI = "gs://bucket_name/object_name" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJob - - return ExtractJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestExtractJob, self)._make_resource(started, ended) - config = resource["configuration"]["extract"] - config["sourceTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - config["destinationUris"] = [self.DESTINATION_URI] - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("extract") - - self.assertEqual(job.destination_uris, config["destinationUris"]) - - if "sourceTable" in config: - table_ref = config["sourceTable"] - self.assertEqual(job.source.project, table_ref["projectId"]) - self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.source.table_id, table_ref["tableId"]) - else: - model_ref = config["sourceModel"] - self.assertEqual(job.source.project, model_ref["projectId"]) - self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) - self.assertEqual(job.source.model_id, model_ref["modelId"]) - - if "compression" in config: - self.assertEqual(job.compression, config["compression"]) - else: - self.assertIsNone(job.compression) - - if "destinationFormat" in config: - self.assertEqual(job.destination_format, config["destinationFormat"]) - else: - self.assertIsNone(job.destination_format) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - - if "printHeader" in config: - self.assertEqual(job.print_header, config["printHeader"]) - else: - self.assertIsNone(job.print_header) - - def test_ctor(self): - from google.cloud.bigquery.table import Table - - client = _make_client(project=self.PROJECT) - source = Table(self.TABLE_REF) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - self.assertEqual(job.source.project, self.PROJECT) - self.assertEqual(job.source.dataset_id, self.DS_ID) - self.assertEqual(job.source.table_id, self.TABLE_ID) - self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['extract'] - self.assertIsNone(job.compression) - self.assertIsNone(job.destination_format) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.print_header) - - def test_destination_uri_file_counts(self): - file_counts = 23 - client = _make_client(project=self.PROJECT) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - self.assertIsNone(job.destination_uri_file_counts) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats = statistics["extract"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats["destinationUriFileCounts"] = [str(file_counts)] - self.assertEqual(job.destination_uri_file_counts, [file_counts]) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_for_model(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceModel": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": "model_id", - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import Compression - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - extract_config = RESOURCE["configuration"]["extract"] - extract_config["compression"] = Compression.GZIP - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import Compression - from google.cloud.bigquery.job import DestinationFormat - from google.cloud.bigquery.job import ExtractJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - EXTRACT_CONFIGURATION = { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - "compression": Compression.GZIP, - "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, - "fieldDelimiter": "|", - "printHeader": False, - } - RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - config = ExtractJobConfig() - config.compression = Compression.GZIP - config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON - config.field_delimiter = "|" - config.print_header = False - job = self._make_one( - self.JOB_ID, source, [self.DESTINATION_URI], client1, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"extract": EXTRACT_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestQueryJobConfig(unittest.TestCase, _Base): - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJobConfig - - return QueryJobConfig - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - config = self._make_one() - self.assertEqual(config._properties, {"query": {}}) - - def test_ctor_w_none(self): - config = self._make_one() - config.default_dataset = None - config.destination = None - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - - def test_ctor_w_properties(self): - config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) - - self.assertFalse(config.use_query_cache) - self.assertTrue(config.use_legacy_sql) - - def test_ctor_w_string_default_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._get_target_class()(default_dataset=default_dataset) - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_ctor_w_string_destinaton(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._get_target_class()(destination=destination) - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_default_dataset_w_string(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._make_one() - config.default_dataset = default_dataset - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_default_dataset_w_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - expected = dataset.DatasetReference.from_string(default_dataset) - config = self._make_one() - config.default_dataset = dataset.Dataset(expected) - self.assertEqual(config.default_dataset, expected) - - def test_destinaton_w_string(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._make_one() - config.destination = destination - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["query"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning(self): - from google.cloud.bigquery import table - - time_partitioning = table.TimePartitioning( - type_=table.TimePartitioningType.DAY, field="name" - ) - config = self._make_one() - config.time_partitioning = time_partitioning - # TimePartitioning should be configurable after assigning - time_partitioning.expiration_ms = 10000 - - self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) - self.assertEqual(config.time_partitioning.field, "name") - self.assertEqual(config.time_partitioning.expiration_ms, 10000) - - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - - def test_clustering_fields(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config.clustering_fields, fields) - - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - config = klass.from_api_repr({}) - self.assertIsNone(config.dry_run) - self.assertIsNone(config.use_legacy_sql) - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - self.assertIsNone(config.destination_encryption_configuration) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - resource = { - "query": { - "useLegacySql": True, - "query": "no property for me", - "defaultDataset": { - "projectId": "someproject", - "datasetId": "somedataset", - }, - "someNewProperty": "I should be saved, too.", - }, - "dryRun": True, - } - klass = self._get_target_class() - - config = klass.from_api_repr(resource) - - self.assertTrue(config.use_legacy_sql) - self.assertEqual( - config.default_dataset, DatasetReference("someproject", "somedataset") - ) - self.assertTrue(config.dry_run) - # Make sure unknown properties propagate. - self.assertEqual(config._properties["query"]["query"], "no property for me") - self.assertEqual( - config._properties["query"]["someNewProperty"], "I should be saved, too." - ) - - def test_to_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - config = self._make_one() - config.use_legacy_sql = True - config.default_dataset = DatasetReference("someproject", "somedataset") - config.dry_run = False - config._properties["someNewProperty"] = "Woohoo, alpha stuff." - - resource = config.to_api_repr() - - self.assertFalse(resource["dryRun"]) - self.assertTrue(resource["query"]["useLegacySql"]) - self.assertEqual( - resource["query"]["defaultDataset"]["projectId"], "someproject" - ) - self.assertEqual( - resource["query"]["defaultDataset"]["datasetId"], "somedataset" - ) - # Make sure unknown properties propagate. - self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "query": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"query": {"destinationEncryptionConfiguration": None}} - ) - - def test_from_api_repr_with_encryption(self): - resource = { - "query": { - "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} - } - } - klass = self._get_target_class() - config = klass.from_api_repr(resource) - self.assertEqual( - config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME - ) - - -class TestQueryJob(unittest.TestCase, _Base): - JOB_TYPE = "query" - QUERY = "select count(*) from persons" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJob - - return QueryJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) - config = resource["configuration"]["query"] - config["query"] = self.QUERY - return resource - - def _verifyBooleanResourceProperties(self, job, config): - - if "allowLargeResults" in config: - self.assertEqual(job.allow_large_results, config["allowLargeResults"]) - else: - self.assertIsNone(job.allow_large_results) - if "flattenResults" in config: - self.assertEqual(job.flatten_results, config["flattenResults"]) - else: - self.assertIsNone(job.flatten_results) - if "useQueryCache" in config: - self.assertEqual(job.use_query_cache, config["useQueryCache"]) - else: - self.assertIsNone(job.use_query_cache) - if "useLegacySql" in config: - self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) - else: - self.assertIsNone(job.use_legacy_sql) - - def _verifyIntegerResourceProperties(self, job, config): - if "maximumBillingTier" in config: - self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) - else: - self.assertIsNone(job.maximum_billing_tier) - if "maximumBytesBilled" in config: - self.assertEqual( - str(job.maximum_bytes_billed), config["maximumBytesBilled"] - ) - self.assertIsInstance(job.maximum_bytes_billed, int) - else: - self.assertIsNone(job.maximum_bytes_billed) - - def _verify_udf_resources(self, job, config): - udf_resources = config.get("userDefinedFunctionResources", ()) - self.assertEqual(len(job.udf_resources), len(udf_resources)) - for found, expected in zip(job.udf_resources, udf_resources): - if "resourceUri" in expected: - self.assertEqual(found.udf_type, "resourceUri") - self.assertEqual(found.value, expected["resourceUri"]) - else: - self.assertEqual(found.udf_type, "inlineCode") - self.assertEqual(found.value, expected["inlineCode"]) - - def _verifyQueryParameters(self, job, config): - query_parameters = config.get("queryParameters", ()) - self.assertEqual(len(job.query_parameters), len(query_parameters)) - for found, expected in zip(job.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verify_table_definitions(self, job, config): - table_defs = config.get("tableDefinitions") - if job.table_definitions is None: - self.assertIsNone(table_defs) - else: - self.assertEqual(len(job.table_definitions), len(table_defs)) - for found_key, found_ec in job.table_definitions.items(): - expected_ec = table_defs.get(found_key) - self.assertIsNotNone(expected_ec) - self.assertEqual(found_ec.to_api_repr(), expected_ec) - - def _verify_configuration_properties(self, job, configuration): - if "dryRun" in configuration: - self.assertEqual(job.dry_run, configuration["dryRun"]) - else: - self.assertIsNone(job.dry_run) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - configuration = resource.get("configuration", {}) - self._verify_configuration_properties(job, configuration) - - query_config = resource.get("configuration", {}).get("query") - self._verifyBooleanResourceProperties(job, query_config) - self._verifyIntegerResourceProperties(job, query_config) - self._verify_udf_resources(job, query_config) - self._verifyQueryParameters(job, query_config) - self._verify_table_definitions(job, query_config) - - self.assertEqual(job.query, query_config["query"]) - if "createDisposition" in query_config: - self.assertEqual(job.create_disposition, query_config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "defaultDataset" in query_config: - ds_ref = job.default_dataset - ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} - self.assertEqual(ds_ref, query_config["defaultDataset"]) - else: - self.assertIsNone(job.default_dataset) - if "destinationTable" in query_config: - table = job.destination - tb_ref = { - "projectId": table.project, - "datasetId": table.dataset_id, - "tableId": table.table_id, - } - self.assertEqual(tb_ref, query_config["destinationTable"]) - else: - self.assertIsNone(job.destination) - if "priority" in query_config: - self.assertEqual(job.priority, query_config["priority"]) - else: - self.assertIsNone(job.priority) - if "writeDisposition" in query_config: - self.assertEqual(job.write_disposition, query_config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "destinationEncryptionConfiguration" in query_config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - query_config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - if "schemaUpdateOptions" in query_config: - self.assertEqual( - job.schema_update_options, query_config["schemaUpdateOptions"] - ) - else: - self.assertIsNone(job.schema_update_options) - - def test_ctor_defaults(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query, self.QUERY) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - self.assertFalse(job.use_legacy_sql) - - # set/read from resource['configuration']['query'] - self.assertIsNone(job.allow_large_results) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.default_dataset) - self.assertIsNone(job.destination) - self.assertIsNone(job.flatten_results) - self.assertIsNone(job.priority) - self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.dry_run) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.maximum_billing_tier) - self.assertIsNone(job.maximum_bytes_billed) - self.assertIsNone(job.table_definitions) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig() - config.udf_resources = udf_resources - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig(query_parameters=query_parameters) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.query_parameters, query_parameters) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"query": {"query": self.QUERY}}, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - query_config = RESOURCE["configuration"]["query"] - query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE - query_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancelled(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "stopped"}, - } - - self.assertTrue(job.cancelled()) - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - call_args = fake_reload.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - def test_done_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - def test_query_plan(self): - from google.cloud._helpers import _RFC3339_MICROS - from google.cloud.bigquery.job import QueryPlanEntry - from google.cloud.bigquery.job import QueryPlanEntryStep - - plan_entries = [ - { - "name": "NAME", - "id": "1234", - "inputStages": ["88", "101"], - "startMs": "1522540800000", - "endMs": "1522540804000", - "parallelInputs": "1000", - "completedParallelInputs": "5", - "waitMsAvg": "33", - "waitMsMax": "400", - "waitRatioAvg": 2.71828, - "waitRatioMax": 3.14159, - "readMsAvg": "45", - "readMsMax": "90", - "readRatioAvg": 1.41421, - "readRatioMax": 1.73205, - "computeMsAvg": "55", - "computeMsMax": "99", - "computeRatioAvg": 0.69315, - "computeRatioMax": 1.09861, - "writeMsAvg": "203", - "writeMsMax": "340", - "writeRatioAvg": 3.32193, - "writeRatioMax": 2.30258, - "recordsRead": "100", - "recordsWritten": "1", - "status": "STATUS", - "shuffleOutputBytes": "1024", - "shuffleOutputBytesSpilled": "1", - "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query_plan, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.query_plan, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.query_plan, []) - - query_stats["queryPlan"] = plan_entries - - self.assertEqual(len(job.query_plan), len(plan_entries)) - for found, expected in zip(job.query_plan, plan_entries): - self.assertIsInstance(found, QueryPlanEntry) - self.assertEqual(found.name, expected["name"]) - self.assertEqual(found.entry_id, expected["id"]) - self.assertEqual(len(found.input_stages), len(expected["inputStages"])) - for f_id in found.input_stages: - self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) - self.assertEqual( - found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" - ) - self.assertEqual( - found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" - ) - self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) - self.assertEqual( - found.completed_parallel_inputs, - int(expected["completedParallelInputs"]), - ) - self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) - self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) - self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) - self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) - self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) - self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) - self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) - self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) - self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) - self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) - self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) - self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) - self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) - self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) - self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) - self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) - self.assertEqual(found.records_read, int(expected["recordsRead"])) - self.assertEqual(found.records_written, int(expected["recordsWritten"])) - self.assertEqual(found.status, expected["status"]) - self.assertEqual( - found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) - ) - self.assertEqual( - found.shuffle_output_bytes_spilled, - int(expected["shuffleOutputBytesSpilled"]), - ) - - self.assertEqual(len(found.steps), len(expected["steps"])) - for f_step, e_step in zip(found.steps, expected["steps"]): - self.assertIsInstance(f_step, QueryPlanEntryStep) - self.assertEqual(f_step.kind, e_step["kind"]) - self.assertEqual(f_step.substeps, e_step["substeps"]) - - def test_total_bytes_processed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats["totalBytesProcessed"] = str(total_bytes) - self.assertEqual(job.total_bytes_processed, total_bytes) - - def test_total_bytes_billed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_billed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats["totalBytesBilled"] = str(total_bytes) - self.assertEqual(job.total_bytes_billed, total_bytes) - - def test_billing_tier(self): - billing_tier = 1 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.billing_tier) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.billing_tier) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.billing_tier) - - query_stats["billingTier"] = billing_tier - self.assertEqual(job.billing_tier, billing_tier) - - def test_cache_hit(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.cache_hit) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.cache_hit) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.cache_hit) - - query_stats["cacheHit"] = True - self.assertTrue(job.cache_hit) - - def test_ddl_operation_performed(self): - op = "SKIP" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_operation_performed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats["ddlOperationPerformed"] = op - self.assertEqual(job.ddl_operation_performed, op) - - def test_ddl_target_routine(self): - from google.cloud.bigquery.routine import RoutineReference - - ref_routine = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "routineId": "targetroutine", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_routine) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats["ddlTargetRoutine"] = ref_routine - self.assertIsInstance(job.ddl_target_routine, RoutineReference) - self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") - self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_routine.project, self.PROJECT) - - def test_ddl_target_table(self): - from google.cloud.bigquery.table import TableReference - - ref_table = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "tableId": "targettable", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_table) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats["ddlTargetTable"] = ref_table - self.assertIsInstance(job.ddl_target_table, TableReference) - self.assertEqual(job.ddl_target_table.table_id, "targettable") - self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_table.project, self.PROJECT) - - def test_num_dml_affected_rows(self): - num_rows = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.num_dml_affected_rows) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats["numDmlAffectedRows"] = str(num_rows) - self.assertEqual(job.num_dml_affected_rows, num_rows) - - def test_slot_millis(self): - millis = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.slot_millis) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.slot_millis) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.slot_millis) - - query_stats["totalSlotMs"] = millis - self.assertEqual(job.slot_millis, millis) - - def test_statement_type(self): - statement_type = "SELECT" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.statement_type) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.statement_type) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.statement_type) - - query_stats["statementType"] = statement_type - self.assertEqual(job.statement_type, statement_type) - - def test_referenced_tables(self): - from google.cloud.bigquery.table import TableReference - - ref_tables_resource = [ - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, - { - "projectId": "other-project-123", - "datasetId": "other-dataset", - "tableId": "other-table", - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.referenced_tables, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats["referencedTables"] = ref_tables_resource - - local1, local2, remote = job.referenced_tables - - self.assertIsInstance(local1, TableReference) - self.assertEqual(local1.table_id, "local1") - self.assertEqual(local1.dataset_id, "dataset") - self.assertEqual(local1.project, self.PROJECT) - - self.assertIsInstance(local2, TableReference) - self.assertEqual(local2.table_id, "local2") - self.assertEqual(local2.dataset_id, "dataset") - self.assertEqual(local2.project, self.PROJECT) - - self.assertIsInstance(remote, TableReference) - self.assertEqual(remote.table_id, "other-table") - self.assertEqual(remote.dataset_id, "other-dataset") - self.assertEqual(remote.project, "other-project-123") - - def test_timeline(self): - timeline_resource = [ - { - "elapsedMs": 1, - "activeUnits": 22, - "pendingUnits": 33, - "completedUnits": 44, - "totalSlotMs": 101, - } - ] - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.timeline, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.timeline, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.timeline, []) - - query_stats["timeline"] = timeline_resource - - self.assertEqual(len(job.timeline), len(timeline_resource)) - self.assertEqual(job.timeline[0].elapsed_ms, 1) - self.assertEqual(job.timeline[0].active_units, 22) - self.assertEqual(job.timeline[0].pending_units, 33) - self.assertEqual(job.timeline[0].completed_units, 44) - self.assertEqual(job.timeline[0].slot_millis, 101) - - def test_undeclared_query_parameters(self): - from google.cloud.bigquery.query import ArrayQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameter - from google.cloud.bigquery.query import StructQueryParameter - - undeclared = [ - { - "name": "my_scalar", - "parameterType": {"type": "STRING"}, - "parameterValue": {"value": "value"}, - }, - { - "name": "my_array", - "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, - "parameterValue": { - "arrayValues": [{"value": "1066"}, {"value": "1745"}] - }, - }, - { - "name": "my_struct", - "parameterType": { - "type": "STRUCT", - "structTypes": [{"name": "count", "type": {"type": "INT64"}}], - }, - "parameterValue": {"structValues": {"count": {"value": "123"}}}, - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.undeclared_query_parameters, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats["undeclaredQueryParameters"] = undeclared - - scalar, array, struct = job.undeclared_query_parameters - - self.assertIsInstance(scalar, ScalarQueryParameter) - self.assertEqual(scalar.name, "my_scalar") - self.assertEqual(scalar.type_, "STRING") - self.assertEqual(scalar.value, "value") - - self.assertIsInstance(array, ArrayQueryParameter) - self.assertEqual(array.name, "my_array") - self.assertEqual(array.array_type, "INT64") - self.assertEqual(array.values, [1066, 1745]) - - self.assertIsInstance(struct, StructQueryParameter) - self.assertEqual(struct.name, "my_struct") - self.assertEqual(struct.struct_types, {"count": "INT64"}) - self.assertEqual(struct.struct_values, {"count": 123}) - - def test_estimated_bytes_processed(self): - est_bytes = 123456 - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.estimated_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats["estimatedBytesProcessed"] = str(est_bytes) - self.assertEqual(job.estimated_bytes_processed, est_bytes) - - def test_result(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - # Explicitly set totalRows to be different from the initial - # response to test update during iteration. - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 2) - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - # Test that the total_rows property has changed during iteration, based - # on the response from tabledata.list. - self.assertEqual(result.total_rows, 1) - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] - ) - - def test_result_with_done_job_calls_get_query_results(self): - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "1", - } - job_resource = self._make_resource(started=True, ended=True) - job_resource["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection(query_resource_done, tabledata_resource) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) - - def test_result_with_max_results(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - max_results = 3 - - result = job.result(max_results=max_results) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 3) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results - ) - - def test_result_w_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = _make_connection( - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - query_resource_done, - exceptions.NotFound("not normally retriable"), - job_resource_done, - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - - connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] - ) - - def test_result_w_empty_schema(self): - from google.cloud.bigquery.table import _EmptyRowIterator - - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource, query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIsInstance(result, _EmptyRowIterator) - self.assertEqual(list(result), []) - - def test_result_invokes_begins(self): - begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_timeout(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), - ) - self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_page_size(self): - # Arrange - query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "4", - } - job_resource = self._make_resource(started=True, ended=True) - q_config = job_resource["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - tabledata_resource = { - "totalRows": 4, - "pageToken": "some-page-token", - "rows": [ - {"f": [{"v": "row1"}]}, - {"f": [{"v": "row2"}]}, - {"f": [{"v": "row3"}]}, - ], - } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - # Act - result = job.result(page_size=3) - - # Assert - actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) - - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] - ) - - def test_result_with_start_index(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - {"f": [{"v": "jkl"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - start_index = 1 - - result = job.result(start_index=start_index) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 4) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["startIndex"], start_index - ) - - def test_result_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - error_result = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "invalid", - } - job._properties["status"] = { - "errorResult": error_result, - "errors": [error_result], - "state": "DONE", - } - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": job._properties["jobReference"]} - ) - job._set_future_result() - - with self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test_result_transport_timeout_error(self): - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=requests.exceptions.Timeout("Server response took too long."), - ) - - # Make sure that timeout errors get rebranded to concurrent futures timeout. - with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1) - - def test__begin_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=exceptions.BadRequest("Syntax error in SQL query"), - ) - - with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test__begin_w_timeout(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(timeout=7.5) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False} - }, - }, - timeout=7.5, - ) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - DS_ID = "DATASET" - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - - config = QueryJobConfig() - config.default_dataset = DatasetReference(self.PROJECT, DS_ID) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertIsNone(job.default_dataset) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "defaultDataset": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.job import QueryPriority - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - TABLE = "TABLE" - DS_ID = "DATASET" - RESOURCE = self._make_resource(ended=True) - QUERY_CONFIGURATION = { - "query": self.QUERY, - "allowLargeResults": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": TABLE, - }, - "flattenResults": True, - "priority": QueryPriority.INTERACTIVE, - "useQueryCache": True, - "useLegacySql": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "maximumBillingTier": 4, - "maximumBytesBilled": "123456", - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], - } - RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION - RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(TABLE) - - config = QueryJobConfig() - config.allow_large_results = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.default_dataset = dataset_ref - config.destination = table_ref - config.dry_run = True - config.flatten_results = True - config.maximum_billing_tier = 4 - config.priority = QueryPriority.INTERACTIVE - config.use_legacy_sql = True - config.use_query_cache = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.maximum_bytes_billed = 123456 - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] - job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_udf(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - udf_resources = [ - UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE), - ] - config = QueryJobConfig() - config.udf_resources = udf_resources - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.udf_resources, udf_resources) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "userDefinedFunctionResources": [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "NAMED" - config["queryParameters"] = [ - { - "name": "foo", - "parameterType": {"type": "INT64"}, - "parameterValue": {"value": "123"}, - } - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "NAMED", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter.positional("INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "POSITIONAL" - config["queryParameters"] = [ - {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "POSITIONAL", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_table_defs(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.external_config import BigtableColumn - from google.cloud.bigquery.external_config import BigtableColumnFamily - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - - bt_config = ExternalConfig("BIGTABLE") - bt_config.ignore_unknown_values = True - bt_config.options.read_rowkey_as_string = True - cf = BigtableColumnFamily() - cf.family_id = "cf" - col = BigtableColumn() - col.field_name = "fn" - cf.columns = [col] - bt_config.options.column_families = [cf] - BT_CONFIG_RESOURCE = { - "sourceFormat": "BIGTABLE", - "ignoreUnknownValues": True, - "bigtableOptions": { - "readRowkeyAsString": True, - "columnFamilies": [ - {"familyId": "cf", "columns": [{"fieldName": "fn"}]} - ], - }, - } - CSV_CONFIG_RESOURCE = { - "sourceFormat": "CSV", - "maxBadRecords": 8, - "csvOptions": {"allowJaggedRows": True}, - } - csv_config = ExternalConfig("CSV") - csv_config.max_bad_records = 8 - csv_config.options.allow_jagged_rows = True - bt_table = "bigtable-table" - csv_table = "csv-table" - RESOURCE["configuration"]["query"]["tableDefinitions"] = { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - } - want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.table_definitions = {bt_table: bt_config, csv_table: csv_config} - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "tableDefinitions": { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, want_resource) - - def test_dry_run_query(self): - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.dry_run = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False}, - "dryRun": True, - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - q_config = RESOURCE["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": DEST_TABLE, - } - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_timeout(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(timeout=4.2) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=4.2 - ) - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - { - "name": "spouse_1", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - { - "name": "spouse_2", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - ] - }, - } - tabledata_resource = { - "rows": [ - { - "f": [ - {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, - {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, - ] - }, - { - "f": [ - {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, - {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, - ] - }, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - tbl = job.to_arrow(create_bqstorage_client=False) - - self.assertIsInstance(tbl, pyarrow.Table) - self.assertEqual(tbl.num_rows, 2) - - # Check the schema. - self.assertEqual(tbl.schema[0].name, "spouse_1") - self.assertEqual(tbl.schema[0].type[0].name, "name") - self.assertEqual(tbl.schema[0].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[0].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[0].type[1].type)) - self.assertEqual(tbl.schema[1].name, "spouse_2") - self.assertEqual(tbl.schema[1].type[0].name, "name") - self.assertEqual(tbl.schema[1].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[1].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[1].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type[1].type)) - - # Check the data. - tbl_data = tbl.to_pydict() - spouse_1 = tbl_data["spouse_1"] - self.assertEqual( - spouse_1, - [ - {"name": "Phred Phlyntstone", "age": 32}, - {"name": "Bhettye Rhubble", "age": 27}, - ], - ) - spouse_2 = tbl_data["spouse_2"] - self.assertEqual( - spouse_2, - [ - {"name": "Wylma Phlyntstone", "age": 29}, - {"name": "Bharney Rhubble", "age": 33}, - ], - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_ddl_query(self): - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - df = job.to_dataframe() - - self.assertEqual(len(df), 0) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_bqstorage(self): - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, - data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/{}".format(self.PROJECT), - read_session=expected_session, - max_stream_count=0, # Use default number of streams for best performance. - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "start_timestamp", "type": "TIMESTAMP"}, - {"name": "seconds", "type": "INT64"}, - {"name": "miles", "type": "FLOAT64"}, - {"name": "km", "type": "FLOAT64"}, - {"name": "payment_type", "type": "STRING"}, - {"name": "complete", "type": "BOOL"}, - {"name": "date", "type": "DATE"}, - ] - }, - } - row_data = [ - [ - "1.4338368E9", - "420", - "1.1", - "1.77", - "Cto_dataframeash", - "true", - "1999-12-01", - ], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 3) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") - self.assertEqual(df.seconds.dtype.name, "int64") - self.assertEqual(df.miles.dtype.name, "float64") - self.assertEqual(df.km.dtype.name, "float16") - self.assertEqual(df.payment_type.dtype.name, "object") - self.assertEqual(df.complete.dtype.name, "bool") - self.assertEqual(df.date.dtype.name, "object") - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_date_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "1", - "schema": {"fields": [{"name": "date", "type": "DATE"}]}, - } - row_data = [ - ["1999-12-01"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.date.dtype.name, "datetime64[ns]") - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_with_progress_bar(self, tqdm_mock): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}] - }, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - query_resource, - done_resource, - query_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() - - job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() - - def test_iter(self): - import types - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "0", - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - self.assertIsInstance(iter(job), types.GeneratorType) - - -class TestQueryPlanEntryStep(unittest.TestCase, _Base): - KIND = "KIND" - SUBSTEPS = ("SUB1", "SUB2") - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntryStep - - return QueryPlanEntryStep - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - step = klass.from_api_repr({}) - self.assertIsNone(step.kind) - self.assertEqual(step.substeps, []) - - def test_from_api_repr_normal(self): - resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} - klass = self._get_target_class() - step = klass.from_api_repr(resource) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test___eq___mismatched_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertNotEqual(step, object()) - - def test___eq___mismatch_kind(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one("OTHER", self.SUBSTEPS) - self.assertNotEqual(step, other) - - def test___eq___mismatch_substeps(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, ()) - self.assertNotEqual(step, other) - - def test___eq___hit(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step, other) - - def test___eq___wrong_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertFalse(step == "hello") - - -class TestQueryPlanEntry(unittest.TestCase, _Base): - NAME = "NAME" - ENTRY_ID = 1234 - START_MS = 1522540800000 - END_MS = 1522540804000 - INPUT_STAGES = (88, 101) - PARALLEL_INPUTS = 1000 - COMPLETED_PARALLEL_INPUTS = 5 - WAIT_MS_AVG = 33 - WAIT_MS_MAX = 400 - WAIT_RATIO_AVG = 2.71828 - WAIT_RATIO_MAX = 3.14159 - READ_MS_AVG = 45 - READ_MS_MAX = 90 - READ_RATIO_AVG = 1.41421 - READ_RATIO_MAX = 1.73205 - COMPUTE_MS_AVG = 55 - COMPUTE_MS_MAX = 99 - COMPUTE_RATIO_AVG = 0.69315 - COMPUTE_RATIO_MAX = 1.09861 - WRITE_MS_AVG = 203 - WRITE_MS_MAX = 340 - WRITE_RATIO_AVG = 3.32193 - WRITE_RATIO_MAX = 2.30258 - RECORDS_READ = 100 - RECORDS_WRITTEN = 1 - STATUS = "STATUS" - SHUFFLE_OUTPUT_BYTES = 1024 - SHUFFLE_OUTPUT_BYTES_SPILLED = 1 - - START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" - END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntry - - return QueryPlanEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - - self.assertIsNone(entry.name) - self.assertIsNone(entry.entry_id) - self.assertEqual(entry.input_stages, []) - self.assertIsNone(entry.start) - self.assertIsNone(entry.end) - self.assertIsNone(entry.parallel_inputs) - self.assertIsNone(entry.completed_parallel_inputs) - self.assertIsNone(entry.wait_ms_avg) - self.assertIsNone(entry.wait_ms_max) - self.assertIsNone(entry.wait_ratio_avg) - self.assertIsNone(entry.wait_ratio_max) - self.assertIsNone(entry.read_ms_avg) - self.assertIsNone(entry.read_ms_max) - self.assertIsNone(entry.read_ratio_avg) - self.assertIsNone(entry.read_ratio_max) - self.assertIsNone(entry.compute_ms_avg) - self.assertIsNone(entry.compute_ms_max) - self.assertIsNone(entry.compute_ratio_avg) - self.assertIsNone(entry.compute_ratio_max) - self.assertIsNone(entry.write_ms_avg) - self.assertIsNone(entry.write_ms_max) - self.assertIsNone(entry.write_ratio_avg) - self.assertIsNone(entry.write_ratio_max) - self.assertIsNone(entry.records_read) - self.assertIsNone(entry.records_written) - self.assertIsNone(entry.status) - self.assertIsNone(entry.shuffle_output_bytes) - self.assertIsNone(entry.shuffle_output_bytes_spilled) - self.assertEqual(entry.steps, []) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.job import QueryPlanEntryStep - - steps = [ - QueryPlanEntryStep( - kind=TestQueryPlanEntryStep.KIND, - substeps=TestQueryPlanEntryStep.SUBSTEPS, - ) - ] - resource = { - "name": self.NAME, - "id": self.ENTRY_ID, - "inputStages": self.INPUT_STAGES, - "startMs": self.START_MS, - "endMs": self.END_MS, - "waitMsAvg": self.WAIT_MS_AVG, - "waitMsMax": self.WAIT_MS_MAX, - "waitRatioAvg": self.WAIT_RATIO_AVG, - "waitRatioMax": self.WAIT_RATIO_MAX, - "readMsAvg": self.READ_MS_AVG, - "readMsMax": self.READ_MS_MAX, - "readRatioAvg": self.READ_RATIO_AVG, - "readRatioMax": self.READ_RATIO_MAX, - "computeMsAvg": self.COMPUTE_MS_AVG, - "computeMsMax": self.COMPUTE_MS_MAX, - "computeRatioAvg": self.COMPUTE_RATIO_AVG, - "computeRatioMax": self.COMPUTE_RATIO_MAX, - "writeMsAvg": self.WRITE_MS_AVG, - "writeMsMax": self.WRITE_MS_MAX, - "writeRatioAvg": self.WRITE_RATIO_AVG, - "writeRatioMax": self.WRITE_RATIO_MAX, - "recordsRead": self.RECORDS_READ, - "recordsWritten": self.RECORDS_WRITTEN, - "status": self.STATUS, - "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, - "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, - "steps": [ - { - "kind": TestQueryPlanEntryStep.KIND, - "substeps": TestQueryPlanEntryStep.SUBSTEPS, - } - ], - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.name, self.NAME) - self.assertEqual(entry.entry_id, self.ENTRY_ID) - self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) - self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) - self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) - self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) - self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) - self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) - self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) - self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) - self.assertEqual(entry.records_read, self.RECORDS_READ) - self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) - self.assertEqual(entry.status, self.STATUS) - self.assertEqual(entry.steps, steps) - - def test_start(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.start, None) - - entry._properties["startMs"] = self.START_MS - self.assertEqual( - entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS - ) - - def test_end(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.end, None) - - entry._properties["endMs"] = self.END_MS - self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) - - -class TestScriptStackFrame(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStackFrame - - return ScriptStackFrame(resource) - - def test_procedure_id(self): - frame = self._make_one({"procedureId": "some-procedure"}) - self.assertEqual(frame.procedure_id, "some-procedure") - del frame._properties["procedureId"] - self.assertIsNone(frame.procedure_id) - - def test_start_line(self): - frame = self._make_one({"startLine": 5}) - self.assertEqual(frame.start_line, 5) - frame._properties["startLine"] = "5" - self.assertEqual(frame.start_line, 5) - - def test_start_column(self): - frame = self._make_one({"startColumn": 29}) - self.assertEqual(frame.start_column, 29) - frame._properties["startColumn"] = "29" - self.assertEqual(frame.start_column, 29) - - def test_end_line(self): - frame = self._make_one({"endLine": 9}) - self.assertEqual(frame.end_line, 9) - frame._properties["endLine"] = "9" - self.assertEqual(frame.end_line, 9) - - def test_end_column(self): - frame = self._make_one({"endColumn": 14}) - self.assertEqual(frame.end_column, 14) - frame._properties["endColumn"] = "14" - self.assertEqual(frame.end_column, 14) - - def test_text(self): - frame = self._make_one({"text": "QUERY TEXT"}) - self.assertEqual(frame.text, "QUERY TEXT") - - -class TestScriptStatistics(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStatistics - - return ScriptStatistics(resource) - - def test_evalutation_kind(self): - stats = self._make_one({"evaluationKind": "EXPRESSION"}) - self.assertEqual(stats.evaluation_kind, "EXPRESSION") - self.assertEqual(stats.stack_frames, []) - - def test_stack_frames(self): - stats = self._make_one( - { - "stackFrames": [ - { - "procedureId": "some-procedure", - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - }, - {}, - ] - } - ) - stack_frames = stats.stack_frames - self.assertEqual(len(stack_frames), 2) - stack_frame = stack_frames[0] - self.assertEqual(stack_frame.procedure_id, "some-procedure") - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - stack_frame = stack_frames[1] - self.assertIsNone(stack_frame.procedure_id) - self.assertIsNone(stack_frame.start_line) - self.assertIsNone(stack_frame.start_column) - self.assertIsNone(stack_frame.end_line) - self.assertIsNone(stack_frame.end_column) - self.assertIsNone(stack_frame.text) - - -class TestTimelineEntry(unittest.TestCase, _Base): - ELAPSED_MS = 101 - ACTIVE_UNITS = 50 - PENDING_UNITS = 98 - COMPLETED_UNITS = 520 - SLOT_MILLIS = 12029 - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import TimelineEntry - - return TimelineEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - entry = klass.from_api_repr({}) - self.assertIsNone(entry.elapsed_ms) - self.assertIsNone(entry.active_units) - self.assertIsNone(entry.pending_units) - self.assertIsNone(entry.completed_units) - self.assertIsNone(entry.slot_millis) - - def test_from_api_repr_normal(self): - resource = { - "elapsedMs": self.ELAPSED_MS, - "activeUnits": self.ACTIVE_UNITS, - "pendingUnits": self.PENDING_UNITS, - "completedUnits": self.COMPLETED_UNITS, - "totalSlotMs": self.SLOT_MILLIS, - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) - self.assertEqual(entry.active_units, self.ACTIVE_UNITS) - self.assertEqual(entry.pending_units, self.PENDING_UNITS) - self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) - self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) - - -@pytest.mark.parametrize( - "query,expected", - ( - (None, False), - ("", False), - ("select name, age from table", False), - ("select name, age from table LIMIT 10;", False), - ("select name, age from table order by other_column;", True), - ("Select name, age From table Order By other_column", True), - ("SELECT name, age FROM table ORDER BY other_column;", True), - ("select name, age from table order\nby other_column", True), - ("Select name, age From table Order\nBy other_column;", True), - ("SELECT name, age FROM table ORDER\nBY other_column", True), - ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), - ), -) -def test__contains_order_by(query, expected): - from google.cloud.bigquery import job as mut - - if expected: - assert mut._contains_order_by(query) - else: - assert not mut._contains_order_by(query) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -@pytest.mark.parametrize( - "query", - ( - "select name, age from table order by other_column;", - "Select name, age From table Order By other_column;", - "SELECT name, age FROM table ORDER BY other_column;", - "select name, age from table order\nby other_column;", - "Select name, age From table Order\nBy other_column;", - "SELECT name, age FROM table ORDER\nBY other_column;", - "SelecT name, age froM table OrdeR \n\t BY other_column;", - ), -) -def test_to_dataframe_bqstorage_preserve_order(query): - from google.cloud.bigquery.job import QueryJob as target_class - - job_resource = _make_job_resource( - project_id="test-project", job_type="query", ended=True - ) - job_resource["configuration"]["query"]["query"] = query - job_resource["status"] = {"state": "DONE"} - get_query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": "test-project", "jobId": "test-job"}, - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - "totalRows": "4", - } - connection = _make_connection(get_query_results_resource, job_resource) - client = _make_client(connection=connection) - job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **job_resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/test-project", - read_session=expected_session, - max_stream_count=1, # Use a single stream to preserve row order. - ) From 0c3476d56380d70115f6fd765bf5c5261967052f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 13:42:01 -0600 Subject: [PATCH 15/17] perf: use `jobs.getQueryResults` to download result sets (#363) * refactor: break job into multiple modules Original paths are retained for backwards compatibility. * perf: use `jobs.getQueryResults` to download result sets Replaces `tabledata.list` when `RowIterator` is used for query results. This likely also fixes a few edge cases around BigQuery scripting jobs. * revert unnecessary changes to _get_query_results * simplify RowIterator. no need to hack Table object * fix tests for bqstorage warning * populate location --- google/cloud/bigquery/_pandas_helpers.py | 16 +-- google/cloud/bigquery/client.py | 104 +++++++++++++++--- google/cloud/bigquery/job/query.py | 14 +-- google/cloud/bigquery/table.py | 17 +-- tests/unit/job/helpers.py | 10 +- tests/unit/job/test_base.py | 42 ++++--- tests/unit/job/test_query.py | 133 +++++++++++++---------- tests/unit/test__pandas_helpers.py | 18 +-- tests/unit/test_client.py | 12 +- tests/unit/test_magics.py | 10 +- tests/unit/test_table.py | 11 +- 11 files changed, 256 insertions(+), 131 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 57c8f95f6..7774ce26b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -474,7 +474,7 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) -def _tabledata_list_page_to_arrow(page, column_names, arrow_types): +def _row_iterator_page_to_arrow(page, column_names, arrow_types): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -490,8 +490,8 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types): return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) -def download_arrow_tabledata_list(pages, bq_schema): - """Use tabledata.list to construct an iterable of RecordBatches. +def download_arrow_row_iterator(pages, bq_schema): + """Use HTTP JSON RowIterator to construct an iterable of RecordBatches. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -510,10 +510,10 @@ def download_arrow_tabledata_list(pages, bq_schema): arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) + yield _row_iterator_page_to_arrow(page, column_names, arrow_types) -def _tabledata_list_page_to_dataframe(page, column_names, dtypes): +def _row_iterator_page_to_dataframe(page, column_names, dtypes): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -528,8 +528,8 @@ def _tabledata_list_page_to_dataframe(page, column_names, dtypes): return pandas.DataFrame(columns, columns=column_names) -def download_dataframe_tabledata_list(pages, bq_schema, dtypes): - """Use (slower, but free) tabledata.list to construct a DataFrame. +def download_dataframe_row_iterator(pages, bq_schema, dtypes): + """Use HTTP JSON RowIterator to construct a DataFrame. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -549,7 +549,7 @@ def download_dataframe_tabledata_list(pages, bq_schema, dtypes): bq_schema = schema._to_schema_fields(bq_schema) column_names = [field.name for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) + yield _row_iterator_page_to_dataframe(page, column_names, dtypes) def _bqstorage_page_to_arrow(page): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 57df9455e..cd1474336 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -80,18 +80,19 @@ _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = ( - u"https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - u"{project}/jobs?uploadType=" + "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" + "{project}/jobs?uploadType=" ) -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"resumable" -_GENERIC_CONTENT_TYPE = u"*/*" +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" +_GENERIC_CONTENT_TYPE = "*/*" _READ_LESS_THAN_SIZE = ( "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." ) _NEED_TABLE_ARGUMENT = ( "The table argument should be a table ID string, Table, or TableReference" ) +_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" class Project(object): @@ -293,7 +294,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -371,7 +372,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -1129,7 +1130,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1207,7 +1208,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1284,7 +1285,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1510,7 +1511,7 @@ def delete_table( raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None + self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, ): """Get the query results object for a query job. @@ -1890,7 +1891,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -2374,7 +2375,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = u"\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( @@ -3169,6 +3170,83 @@ def list_rows( # Pass in selected_fields separately from schema so that full # tables can be fetched without a column filter. selected_fields=selected_fields, + total_rows=getattr(table, "num_rows", None), + ) + return row_iterator + + def _list_rows_from_query_results( + self, + job_id, + location, + project, + schema, + total_rows=None, + destination=None, + max_results=None, + start_index=None, + page_size=None, + retry=DEFAULT_RETRY, + timeout=None, + ): + """List the rows of a completed query. + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults + Args: + job_id (str): + ID of a query job. + location (str): Location of the query job. + project (str): + ID of the project where the query job was run. + schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + The fields expected in these query results. Used to convert + from JSON to expected Python types. + total_rows (Optional[int]): + Total number of rows in the query results. + destination (Optional[Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableListItem, \ + google.cloud.bigquery.table.TableReference, \ + str, \ + ]]): + Destination table reference. Used to fetch the query results + with the BigQuery Storage API. + max_results (Optional[int]): + Maximum number of rows to return across the whole iterator. + start_index (Optional[int]): + The zero-based index of the starting row to read. + page_size (Optional[int]): + The maximum number of rows in each page of results from this request. + Non-positive values are ignored. Defaults to a sensible value set by the API. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. + """ + params = { + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": location, + } + + if start_index is not None: + params["startIndex"] = start_index + + row_iterator = RowIterator( + client=self, + api_request=functools.partial(self._call_api, retry, timeout=timeout), + path=f"/projects/{project}/queries/{job_id}", + schema=schema, + max_results=max_results, + page_size=page_size, + table=destination, + extra_params=params, + total_rows=total_rows, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e25077360..1e2002eab 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -38,7 +38,6 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning @@ -1159,12 +1158,13 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, + rows = self._client._list_rows_from_query_results( + self._query_results.job_id, + self.location, + self._query_results.project, + self._query_results.schema, + total_rows=self._query_results.total_rows, + destination=self.destination, page_size=page_size, max_results=max_results, start_index=start_index, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d6d966eee..e46b7e3cd 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1306,6 +1306,8 @@ class RowIterator(HTTPIterator): call the BigQuery Storage API to fetch rows. selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): A subset of columns to select from this table. + total_rows (Optional[int]): + Total number of rows in the table. """ @@ -1321,6 +1323,7 @@ def __init__( extra_params=None, table=None, selected_fields=None, + total_rows=None, ): super(RowIterator, self).__init__( client, @@ -1342,7 +1345,7 @@ def __init__( self._schema = schema self._selected_fields = selected_fields self._table = table - self._total_rows = getattr(table, "num_rows", None) + self._total_rows = total_rows def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1419,7 +1422,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_arrow_tabledata_list, iter(self.pages), self.schema + _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema ) return self._to_page_iterable( bqstorage_download, @@ -1496,7 +1499,7 @@ def to_arrow( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -1582,7 +1585,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_dataframe_tabledata_list, + _pandas_helpers.download_dataframe_row_iterator, iter(self.pages), self.schema, dtypes, @@ -1680,7 +1683,7 @@ def to_dataframe( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -2167,7 +2170,7 @@ def _item_to_row(iterator, resource): ) -def _tabledata_list_page_columns(schema, response): +def _row_iterator_page_columns(schema, response): """Make a generator of all the columns in a page from tabledata.list. This enables creating a :class:`pandas.DataFrame` and other @@ -2197,7 +2200,7 @@ def _rows_page_start(iterator, page, response): """ # Make a (lazy) copy of the page in column-oriented format for use in data # science packages. - page._columns = _tabledata_list_page_columns(iterator._schema, response) + page._columns = _row_iterator_page_columns(iterator._schema, response) total_rows = response.get("totalRows") if total_rows is not None: diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index f928054f6..ea071c5ac 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -60,6 +60,7 @@ def _make_job_resource( endpoint="https://bigquery.googleapis.com", job_type="load", job_id="a-random-id", + location="US", project_id="some-project", user_email="bq-user@example.com", ): @@ -69,7 +70,11 @@ def _make_job_resource( "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, + "jobReference": { + "projectId": project_id, + "jobId": job_id, + "location": location, + }, "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( endpoint, project_id, job_id ), @@ -130,7 +135,7 @@ def _table_ref(self, table_id): return TableReference(self.DS_REF, table_id) - def _make_resource(self, started=False, ended=False): + def _make_resource(self, started=False, ended=False, location="US"): self._setUpConstants() return _make_job_resource( creation_time_ms=int(self.WHEN_TS * 1000), @@ -144,6 +149,7 @@ def _make_resource(self, started=False, ended=False): job_id=self.JOB_ID, project_id=self.PROJECT, user_email=self.USER_EMAIL, + location=location, ) def _verifyInitialReadonlyProperties(self, job): diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 90d4388b8..12e2d4b8b 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -882,10 +882,14 @@ def test_done_already(self): def test_result_default_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="US", + started=True, + ended=True, ) conn = _make_connection( _make_retriable_exception(), @@ -907,7 +911,7 @@ def test_result_default_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "US"}, timeout=None, ) conn.api_request.assert_has_calls( @@ -916,38 +920,48 @@ def test_result_default_wo_state(self): def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="EU", + started=True, + ended=True, ) conn = _make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), + exceptions.NotFound("not normally retriable"), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) + job = self._make_one( + self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client + ) custom_predicate = mock.Mock() custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + ) self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( method="POST", path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + data={ + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": "EU", + } + }, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) conn.api_request.assert_has_calls( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index c0b90d8ea..daaf2e557 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -23,6 +23,7 @@ import requests from six.moves import http_client +from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query from .helpers import _Base from .helpers import _make_client @@ -40,8 +41,10 @@ def _get_target_class(): return QueryJob - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) + def _make_resource(self, started=False, ended=False, location="US"): + resource = super(TestQueryJob, self)._make_resource( + started, ended, location=location + ) config = resource["configuration"]["query"] config["query"] = self.QUERY return resource @@ -770,22 +773,30 @@ def test_result(self): query_resource = { "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, } query_resource_done = { "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="EU") + job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. "totalRows": "1", @@ -793,7 +804,7 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource + query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -809,26 +820,30 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] + [query_results_call, query_results_call, reload_call, query_page_call] ) def test_result_with_done_job_calls_get_query_results(self): @@ -838,18 +853,18 @@ def test_result_with_done_job_calls_get_query_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "1", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + results_page_resource = { "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, tabledata_resource) + conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -859,19 +874,23 @@ def test_result_with_done_job_calls_get_query_results(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_results_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + conn.api_request.assert_has_calls([query_results_call, query_results_page_call]) def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -882,7 +901,7 @@ def test_result_with_max_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", } - tabledata_resource = { + query_page_resource = { "totalRows": "5", "pageToken": None, "rows": [ @@ -891,7 +910,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = _make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -907,9 +926,9 @@ def test_result_with_max_results(self): self.assertEqual(len(rows), 3) self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] + query_page_request = connection.api_request.call_args_list[1] self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results + query_page_request[1]["query_params"]["maxResults"], max_results ) def test_result_w_retry(self): @@ -925,8 +944,10 @@ def test_result_w_retry(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="asia-northeast1") + job_resource_done = self._make_resource( + started=True, ended=True, location="asia-northeast1" + ) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -958,13 +979,13 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "asia-northeast1"}, timeout=None, ) @@ -1059,14 +1080,14 @@ def test_result_w_page_size(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] q_config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } - tabledata_resource = { + query_page_resource = { "totalRows": 4, "pageToken": "some-page-token", "rows": [ @@ -1075,9 +1096,9 @@ def test_result_w_page_size(self): {"f": [{"v": "row3"}]}, ], } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 + query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1089,27 +1110,29 @@ def test_result_w_page_size(self): actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + query_page_1_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] + query_page_2_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "pageToken": "some-page-token", + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) + conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) def test_result_with_start_index(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index bdb1c56ea..ef0c40e1a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1202,7 +1202,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): +def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1216,7 +1216,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): schema.SchemaField("alien_field", "ALIEN_FLOAT_TYPE"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1238,7 +1238,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_known_field_type(module_under_test): +def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1252,7 +1252,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): schema.SchemaField("non_alien_field", "STRING"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1273,7 +1273,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1287,7 +1287,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, dict_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, dict_schema) result = next(results_gen) assert len(result.columns) == 2 @@ -1301,7 +1301,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1315,7 +1315,7 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_dataframe_tabledata_list( + results_gen = module_under_test.download_dataframe_row_iterator( pages, dict_schema, dtypes={} ) result = next(results_gen) @@ -1335,5 +1335,5 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): - dataframe = module_under_test._tabledata_list_page_to_dataframe([], [], {}) + dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e507834f6..ca2f7ea66 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6786,12 +6786,17 @@ def _bigquery_timestamp_float_repr(ts_float): age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + table._properties["numRows"] = 7 iterator = client.list_rows(table, timeout=7.5) + + # Check that initial total_rows is populated from the table. + self.assertEqual(iterator.total_rows, 7) page = six.next(iterator.pages) rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token + + # Check that total_rows is updated based on API response. + self.assertEqual(iterator.total_rows, ROWS) f2i = {"full_name": 0, "age": 1, "joined": 2} self.assertEqual(len(rows), 4) @@ -6799,8 +6804,7 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(rows[1], Row(("Bharney Rhubble", 33, WHEN_1), f2i)) self.assertEqual(rows[2], Row(("Wylma Phlyntstone", 29, WHEN_2), f2i)) self.assertEqual(rows[3], Row(("Bhettye Rhubble", None, None), f2i)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) + self.assertEqual(iterator.next_page_token, TOKEN) conn.api_request.assert_called_once_with( method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index b2877845a..a7cf92919 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -170,7 +170,7 @@ def test_context_with_default_connection(): default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -235,7 +235,7 @@ def test_context_with_custom_connection(): default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -1078,7 +1078,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1117,7 +1117,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1156,7 +1156,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e21453b9f..e232f32e6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1572,10 +1572,7 @@ def test_constructor_with_table(self): from google.cloud.bigquery.table import Table table = Table("proj.dset.tbl") - table._properties["numRows"] = 100 - - iterator = self._make_one(table=table) - + iterator = self._make_one(table=table, total_rows=100) self.assertIs(iterator._table, table) self.assertEqual(iterator.total_rows, 100) @@ -1883,7 +1880,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() @@ -2667,7 +2664,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @@ -2703,7 +2700,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() From d1bf94e0c2c559f82793117e9e90e10ddb2cbdc5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 5 Nov 2020 11:13:26 -0600 Subject: [PATCH 16/17] chore: release 2.3.0 (#351) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 384704bbf..cdcfbe81f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) + + +### Features + +* add `reload` argument to `*Job.done()` functions ([#341](https://www.github.com/googleapis/python-bigquery/issues/341)) ([e51fd45](https://www.github.com/googleapis/python-bigquery/commit/e51fd45fdb0481ac5d59cc0edbfa0750928b2596)) +* pass retry from Job.result() to Job.done() ([#41](https://www.github.com/googleapis/python-bigquery/issues/41)) ([284e17a](https://www.github.com/googleapis/python-bigquery/commit/284e17a17adf6844a17db2c6fed54a649b1f997e)) + + +### Bug Fixes + +* add missing spaces in opentelemetry log message ([#360](https://www.github.com/googleapis/python-bigquery/issues/360)) ([4f326b1](https://www.github.com/googleapis/python-bigquery/commit/4f326b1ca4411cfbf5ded86955a963d3e05a409f)) +* **dbapi:** avoid running % format with no query parameters ([#348](https://www.github.com/googleapis/python-bigquery/issues/348)) ([5dd1a5e](https://www.github.com/googleapis/python-bigquery/commit/5dd1a5e77f13b8e576e917069e247c5390a81900)) +* create_job method accepts dictionary arguments ([#300](https://www.github.com/googleapis/python-bigquery/issues/300)) ([155bacc](https://www.github.com/googleapis/python-bigquery/commit/155bacc156f181384ca6dba699ab83d0398176d1)) + + +### Performance Improvements + +* use `jobs.getQueryResults` to download result sets ([#363](https://www.github.com/googleapis/python-bigquery/issues/363)) ([0c3476d](https://www.github.com/googleapis/python-bigquery/commit/0c3476d56380d70115f6fd765bf5c5261967052f)) + + +### Documentation + +* add documents for QueryPlanEntry and QueryPlanEntryStep ([#344](https://www.github.com/googleapis/python-bigquery/issues/344)) ([dca2e4c](https://www.github.com/googleapis/python-bigquery/commit/dca2e4ca7c2ae183ac4bb60f653d425a43a86bea)) + ## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) From 0c387dadd57fba9cdbfd39abe530de209943db9a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Nov 2020 11:42:06 -0600 Subject: [PATCH 17/17] chore: release v2.3.1 (#370) Follow-up to failed #351 release --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdcfbe81f..787ba7557 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 2.3.1 + +11-05-2020 09:27 PST + +### Internal / Testing Changes + +- update `google.cloud.bigquery.__version__` + ## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index bd0f8e5c7..474ccbcf2 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.2.0" +__version__ = "2.3.1"