diff --git a/CHANGELOG.md b/CHANGELOG.md index 51fad831e..9afd523a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) + + +### Features + +* add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) + + +### Bug Fixes + +* error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) +* QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) + + +### Documentation + +* **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) +* explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) + ## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 29d375b03..f609468f5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -66,8 +66,11 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import DeterminismLevel @@ -93,6 +96,9 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "ArrayQueryParameterType", + "ScalarQueryParameterType", + "StructQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 162c58b4b..7ad416e08 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ import queue import warnings +from packaging import version try: import pandas @@ -80,6 +81,10 @@ def pyarrow_numeric(): return pyarrow.decimal128(38, 9) +def pyarrow_bignumeric(): + return pyarrow.decimal256(76, 38) + + def pyarrow_time(): return pyarrow.time64("us") @@ -128,14 +133,23 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal128 instances. } + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False + else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 95b5869e5..6b36d6e43 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,11 @@ import functools import numbers +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -184,7 +189,12 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - return "NUMERIC" + # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. + scalar_object = pyarrow.scalar(value) + if isinstance(scalar_object, pyarrow.Decimal128Scalar): + return "NUMERIC" + else: + return "BIGNUMERIC" elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 14917820c..20eca9b00 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -78,7 +78,7 @@ def __eq__(self, other): STRING = "STRING" BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL" ) DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") ROWID = "ROWID" diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e353b3132..b378f091b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -18,6 +18,7 @@ import itertools from google.cloud.bigquery_v2 import types as gapic_types +from google.cloud.bigquery.query import ScalarQueryParameterType class Compression(object): @@ -215,6 +216,26 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + STRING = ScalarQueryParameterType("STRING") + BYTES = ScalarQueryParameterType("BYTES") + INTEGER = ScalarQueryParameterType("INT64") + INT64 = ScalarQueryParameterType("INT64") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BOOLEAN = ScalarQueryParameterType("BOOL") + BOOL = ScalarQueryParameterType("BOOL") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + DATE = ScalarQueryParameterType("DATE") + TIME = ScalarQueryParameterType("TIME") + DATETIME = ScalarQueryParameterType("DATETIME") + + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index d8f5d6528..f24e972c8 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -614,7 +614,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Checks if the job is complete. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -635,7 +637,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d87f87f52..5c1118500 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -979,7 +979,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Args: retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. + How to retry the call that retrieves query results. If the job state is + ``DONE``, retrying is aborted early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -988,7 +989,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): unfinished jobs before checking. Default ``True``. Returns: - bool: True if the job is complete, False otherwise. + bool: ``True`` if the job is complete or if fetching its status resulted in + an error, ``False`` otherwise. """ # Do not refresh if the state is already done, as the job will not # change once complete. @@ -996,17 +998,34 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): if not reload or is_done: return is_done - self._reload_query_results(retry=retry, timeout=timeout) - # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + return True + else: + return self.state == _DONE_STATE + # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + return True return self.state == _DONE_STATE @@ -1128,7 +1147,9 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. + How to retry the call that retrieves rows. If the job state is + ``DONE``, retrying is aborted early even if the results are not + available, as this will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f2ed6337e..42547cd73 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -48,6 +48,239 @@ def __ne__(self, other): return not self == other +class _AbstractQueryParameterType: + """Base class for representing query parameter types. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype + """ + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.QueryParameterType: Instance + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + raise NotImplementedError + + +class ScalarQueryParameterType(_AbstractQueryParameterType): + """Type representation for scalar query parameters. + + Args: + type_ (str): + One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', + 'DATETIME', or 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, type_, *, name=None, description=None): + self._type = type_ + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + type_ = resource["type"] + return cls(type_) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return {"type": self._type} + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._type!r}{name}{description})" + + +class ArrayQueryParameterType(_AbstractQueryParameterType): + """Type representation for array query parameters. + + Args: + array_type (Union[ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, array_type, *, name=None, description=None): + self._array_type = array_type + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ArrayQueryParameterType: Instance + """ + array_item_type = resource["arrayType"]["type"] + + if array_item_type in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + else: + klass = ScalarQueryParameterType + + item_type_instance = klass.from_api_repr(resource["arrayType"]) + return cls(item_type_instance) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "ARRAY", + "arrayType": self._array_type.to_api_repr(), + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._array_type!r}{name}{description})" + + +class StructQueryParameterType(_AbstractQueryParameterType): + """Type representation for struct query parameters. + + Args: + fields (Iterable[Union[ \ + ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ + ]]): + An non-empty iterable describing the struct's field types. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, *fields, name=None, description=None): + if not fields: + raise ValueError("Struct type must have at least one field defined.") + + self._fields = fields # fields is a tuple (immutable), no shallow copy needed + self.name = name + self.description = description + + @property + def fields(self): + return self._fields # no copy needed, self._fields is an immutable sequence + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.StructQueryParameterType: Instance + """ + fields = [] + + for struct_field in resource["structTypes"]: + type_repr = struct_field["type"] + if type_repr["type"] in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + elif type_repr["type"] == "ARRAY": + klass = ArrayQueryParameterType + else: + klass = ScalarQueryParameterType + + type_instance = klass.from_api_repr(type_repr) + type_instance.name = struct_field.get("name") + type_instance.description = struct_field.get("description") + fields.append(type_instance) + + return cls(*fields) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + fields = [] + + for field in self._fields: + item = {"type": field.to_api_repr()} + if field.name is not None: + item["name"] = field.name + if field.description is not None: + item["description"] = field.description + + fields.append(item) + + return { + "type": "STRUCT", + "structTypes": fields, + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + items = ", ".join(repr(field) for field in self._fields) + return f"{self.__class__.__name__}({items}{name}{description})" + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -83,7 +316,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -102,7 +335,7 @@ def positional(cls, type_, value): Args: type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -184,28 +417,43 @@ class ArrayQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, + `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. """ def __init__(self, name, array_type, values): self.name = name - self.array_type = array_type self.values = values + if isinstance(array_type, str): + if not values and array_type in {"RECORD", "STRUCT"}: + raise ValueError( + "Missing detailed struct item type info for an empty array, " + "please provide a StructQueryParameterType instance." + ) + self.array_type = array_type + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. Args: - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, + `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name @@ -263,22 +511,40 @@ def to_api_repr(self): Dict: JSON mapping """ values = self.values - if self.array_type == "RECORD" or self.array_type == "STRUCT": + + if self.array_type in {"RECORD", "STRUCT"} or isinstance( + self.array_type, StructQueryParameterType + ): reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]["parameterType"] a_values = [repr_["parameterValue"] for repr_ in reprs] + + if reprs: + a_type = reprs[0]["parameterType"] + else: + # This assertion always evaluates to True because the + # constructor disallows STRUCT/RECORD type defined as a + # string with empty values. + assert isinstance(self.array_type, StructQueryParameterType) + a_type = self.array_type.to_api_repr() else: - a_type = {"type": self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + # Scalar array item type. + if isinstance(self.array_type, str): + a_type = {"type": self.array_type} + else: + a_type = self.array_type.to_api_repr() + + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: values = [converter(value) for value in values] a_values = [{"value": value} for value in values] + resource = { "parameterType": {"type": "ARRAY", "arrayType": a_type}, "parameterValue": {"arrayValues": a_values}, } if self.name is not None: resource["name"] = self.name + return resource def _key(self): @@ -289,7 +555,14 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return (self.name, self.array_type.upper(), self.values) + if isinstance(self.array_type, str): + item_type = self.array_type + elif isinstance(self.array_type, ScalarQueryParameterType): + item_type = self.array_type._type + else: + item_type = "STRUCT" + + return (self.name, item_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index c76aded02..9be27f3e8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -32,6 +32,7 @@ "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, "BOOL": types.StandardSqlDataType.TypeKind.BOOL, "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b2a8c5535..13e710fcc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.9.0" +__version__ = "2.10.0" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f9306af2..e9fcfca03 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 Shapely==1.7.1 diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py index 58cd2b542..c07848bee 100644 --- a/samples/snippets/authenticate_service_account.py +++ b/samples/snippets/authenticate_service_account.py @@ -30,6 +30,11 @@ def main(): key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) + # Alternatively, use service_account.Credentials.from_service_account_info() + # to set credentials directly via a json object rather than set a filepath + # TODO(developer): Set key_json to the content of the service account key file. + # credentials = service_account.Credentials.from_service_account_info(key_json) + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) # [END bigquery_client_json_credentials] return client diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7087121b5..c638178fc 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-bigquery==2.7.0 -google-cloud-bigquery-storage==2.2.1 +google-cloud-bigquery==2.9.0 +google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' diff --git a/setup.py b/setup.py index ea2df4843..31b6a3ff7 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", + "packaging >= 14.3", "protobuf >= 3.12.0", ] extras = { @@ -48,6 +49,7 @@ "pyarrow >= 1.0.0, < 4.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", diff --git a/synth.metadata b/synth.metadata index dc183a72e..9412653c6 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -93,7 +93,6 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", - "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/model_service.rst", diff --git a/synth.py b/synth.py index 3ab271c96..3c6440600 100644 --- a/synth.py +++ b/synth.py @@ -32,6 +32,7 @@ s.move( library, excludes=[ + "*.tar.gz", "docs/index.rst", "docs/bigquery_v2/*_service.rst", "docs/bigquery_v2/services.rst", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 60c3b3fa8..ed48b0bfe 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -65,6 +65,7 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -891,6 +892,9 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -902,21 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - df_data = collections.OrderedDict( - [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - ) + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1003,6 +1008,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -1012,57 +1020,65 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( ( - "ts_col", + "bignum_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - ), - ] - ) + ) + ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1172,6 +1188,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1210,6 +1227,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -2143,7 +2168,9 @@ def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import StructQueryParameterType question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( @@ -2157,6 +2184,10 @@ def test_query_w_query_params(self): pi_numeric_param = ScalarQueryParameter( name="pi_numeric_param", type_="NUMERIC", value=pi_numeric ) + bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) + bignum_param = ScalarQueryParameter( + name="bignum_param", type_="BIGNUMERIC", value=bignum + ) truthy = True truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) beef = b"DEADBEEF" @@ -2198,6 +2229,14 @@ def test_query_w_query_params(self): characters_param = ArrayQueryParameter( name=None, array_type="RECORD", values=[phred_param, bharney_param] ) + empty_struct_array_param = ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( "sidekick", bharney_name_param, bharney_age_param @@ -2288,6 +2327,11 @@ def test_query_w_query_params(self): ], "query_parameters": [characters_param], }, + { + "sql": "SELECT @empty_array_param", + "expected": [], + "query_parameters": [empty_struct_array_param], + }, { "sql": "SELECT @roles", "expected": { @@ -2302,6 +2346,15 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] + if _BIGNUMERIC_SUPPORT: + examples.append( + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + } + ) + for example in examples: jconfig = QueryJobConfig() jconfig.query_parameters = example["query_parameters"] diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index bbeffba50..405ad6ee5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -967,7 +967,7 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index a4ab11ab6..655a121e6 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -16,6 +16,7 @@ import copy import http import textwrap +import types import freezegun from google.api_core import exceptions @@ -308,7 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done(self): + def test_done_job_complete(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -356,6 +357,84 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + def test_done_w_query_results_error_reload_ok_job_finished(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "DONE" + self.set_exception(copy.copy(bad_request_error)) + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert is_done + assert isinstance(job._exception, exceptions.BadRequest) + + def test_done_w_query_results_error_reload_ok_job_still_running(self): + client = _make_client(project=self.PROJECT) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + client._get_query_results = mock.Mock(side_effect=retry_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "RUNNING" + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert not is_done + assert job._exception is None + + def test_done_w_query_results_error_reload_error(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + reload_error = exceptions.DataLoss("Oops, sorry!") + job.reload = mock.Mock(side_effect=reload_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is bad_request_error + + def test_done_w_job_query_results_ok_reload_error(self): + client = _make_client(project=self.PROJECT) + query_results = google.cloud.bigquery.query._QueryResults( + properties={ + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, + } + ) + client._get_query_results = mock.Mock(return_value=query_results) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + job.reload = mock.Mock(side_effect=retry_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is retry_error + def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -973,7 +1052,7 @@ def test_result_w_retry(self): initial=0.001, maximum=0.001, multiplier=1.0, - deadline=0.001, + deadline=0.1, predicate=custom_predicate, ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index ef0c40e1a..abd725820 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -39,6 +39,12 @@ from google import api_core from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + + +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) @pytest.fixture @@ -70,6 +76,15 @@ def is_numeric(type_): )(type_) +def is_bignumeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 76, + lambda type_: type_.scale == 38, + )(type_) + + def is_timestamp(type_): # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type return all_( @@ -120,6 +135,9 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), + pytest.param( + "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -198,6 +216,12 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), + pytest.param( + "BIGNUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, + ), ( "BOOLEAN", "REPEATED", @@ -270,34 +294,41 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected = pyarrow.struct(expected) + assert pyarrow.types.is_struct(actual) assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -314,34 +345,41 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected_value_type = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected_value_type = pyarrow.struct(expected) + assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) assert actual.value_type.num_fields == len(fields) @@ -385,6 +423,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), + pytest.param( + "BIGNUMERIC", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("3.141592653589793238462643383279"), + ], + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( @@ -841,41 +889,45 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), - schema.SchemaField("field09", "BOOL", mode="REQUIRED"), - schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), - schema.SchemaField("field11", "DATE", mode="REQUIRED"), - schema.SchemaField("field12", "TIME", mode="REQUIRED"), - schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), - schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), - ) - dataframe = pandas.DataFrame( - { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [True, False], - "field09": [False, True], - "field10": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), - ], - "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field13": [ - datetime.datetime(1970, 1, 1, 0, 0, 0), - datetime.datetime(2012, 12, 21, 9, 7, 42), - ], - "field14": [ - "POINT(30 10)", - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - } + schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field10", "BOOL", mode="REQUIRED"), + schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field12", "DATE", mode="REQUIRED"), + schema.SchemaField("field13", "TIME", mode="REQUIRED"), + schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) + if _BIGNUMERIC_SUPPORT: + bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) + + data = { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field09": [True, False], + "field10": [False, True], + "field11": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + } + if _BIGNUMERIC_SUPPORT: + data["field08"] = [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ] + dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema @@ -1089,6 +1141,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "bytes_field": b"some bytes", "string_field": u"some characters", "numeric_field": decimal.Decimal("123.456"), + "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } ] ) @@ -1109,6 +1162,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + current_schema += ( + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), + ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1131,6 +1188,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + expected_schema += ( + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), + ) + by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fffa46aa8..c28c014d4 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,6 +25,7 @@ import google.cloud._helpers from google.cloud.bigquery import table +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -51,6 +52,14 @@ def test_scalar_to_query_parameter(self): "TIMESTAMP", ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ) + ) + for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) @@ -104,6 +113,11 @@ def test_array_to_query_parameter_valid_argument(self): ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") + ) + for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index cf268daf1..c8be2911f 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -43,6 +43,318 @@ def test___eq__(self): self.assertNotEqual(udf, wrong_type) +class Test__AbstractQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameterType + + return _AbstractQueryParameterType + + @classmethod + def _make_one(cls, *args, **kw): + return cls._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param_type = self._make_one() + with self.assertRaises(NotImplementedError): + param_type.to_api_repr() + + +class Test_ScalarQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameterType + + return ScalarQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + klass = self._get_target_class() + result = klass.from_api_repr({"type": "BOOLEAN"}) + self.assertEqual(result._type, "BOOLEAN") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + param_type = self._make_one("BYTES", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, {"type": "BYTES"}) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BYTES") + self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("BYTES", name="foo", description="this is foo") + self.assertEqual( + repr(param_type), + "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", + ) + + +class Test_ArrayQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameterType + + return ArrayQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import StructQueryParameterType + + api_resource = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + item_type = result._array_type + self.assertIsInstance(item_type, StructQueryParameterType) + + self.assertIsNone(item_type.name) + self.assertIsNone(item_type.description) + + field = item_type.fields[0] + self.assertEqual(field.name, "weight") + self.assertEqual(field.description, "in kg") + self.assertEqual(field._type, "INTEGER") + + field = item_type.fields[1] + self.assertEqual(field.name, "last_name") + self.assertIsNone(field.description) + self.assertEqual(field._type, "STRING") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + array_item_type = StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="weight", description="in kg"), + ScalarQueryParameterType("STRING", name="last_name"), + ) + param_type = self._make_one(array_item_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BOOLEAN") + self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("INT64", name="bar", description="this is bar") + self.assertEqual( + repr(param_type), + "ArrayQueryParameterType('INT64', name='bar', description='this is bar')", + ) + + +class Test_StructQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameterType + + return StructQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_raises_error_without_any_fields(self): + with self.assertRaisesRegex(ValueError, ".*at least one field.*"): + self._make_one() + + def test_from_api_repr(self): + from google.cloud.bigquery.query import ArrayQueryParameterType + from google.cloud.bigquery.query import ScalarQueryParameterType + + api_resource = { + "type": "STRUCT", + "structTypes": [ + { + "name": "age", + "type": {"type": "INTEGER"}, + "description": "in years", + }, + { + "name": "aliases", + "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + }, + { + "description": "a nested struct", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + self.assertEqual(len(result.fields), 3) + + field = result.fields[0] + self.assertIsInstance(field, ScalarQueryParameterType) + self.assertEqual(field.name, "age") + self.assertEqual(field.description, "in years") + + field = result.fields[1] + self.assertIsInstance(field, ArrayQueryParameterType) + self.assertEqual(field.name, "aliases") + self.assertIsNone(field.description) + self.assertIsInstance(field._array_type, ScalarQueryParameterType) + self.assertEqual(field._array_type._type, "STRING") + + field = result.fields[2] + self.assertIsInstance(field, self._get_target_class()) + self.assertIsNone(field.name) + self.assertEqual(field.description, "a nested struct") + + date_field = field.fields[0] + self.assertEqual(date_field._type, "DATE") + self.assertEqual(date_field.name, "nested_date") + self.assertIsNone(date_field.description) + + bool_field = field.fields[1] + self.assertEqual(bool_field._type, "BOOLEAN") + self.assertIsNone(bool_field.name) + self.assertEqual(bool_field.description, "nested bool field") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + date_type = ScalarQueryParameterType("DATE", name="day_of_birth") + param_type = self._make_one(int_type, date_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + {"name": "day_of_birth", "type": {"type": "DATE"}}, + ], + } + self.assertEqual(result, expected_result) + + def test_to_api_repr_nested(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + struct_class = self._get_target_class() + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + nested_struct_type = struct_class( + ScalarQueryParameterType("DATE", name="nested_date"), + ScalarQueryParameterType("BOOLEAN", description="nested bool field"), + name="nested", + ) + param_type = self._make_one( + int_type, nested_struct_type, name="foo", description="bar" + ) + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + { + "name": "nested", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING") + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')" + ")" + ) + self.assertEqual(repr(param_type), expected) + + def test_repr_all_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), + ScalarQueryParameterType("STRING"), + name="data_record", + description="this is it", + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), " + "name='data_record', description='this is it'" + ")" + ) + self.assertEqual(repr(param_type), expected) + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -166,6 +478,16 @@ def test_to_api_repr_w_numeric(self): param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bignumeric(self): + big_num_string = "{d38}.{d38}".format(d38="9" * 38) + EXPECTED = { + "parameterType": {"type": "BIGNUMERIC"}, + "parameterValue": {"value": big_num_string}, + } + klass = self._get_target_class() + param = klass.positional(type_="BIGNUMERIC", value=big_num_string) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { "parameterType": {"type": "BOOL"}, @@ -330,6 +652,10 @@ def test_ctor(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_ctor_empty_struct_array_wo_type_info(self): + with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"): + self._make_one(name="foo", array_type="STRUCT", values=[]) + def test___eq__(self): param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) @@ -457,6 +783,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_array_type_as_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + EXPECTED = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}}, + "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]}, + } + klass = self._get_target_class() + param = klass.positional( + array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): EXPECTED = { "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, @@ -493,6 +832,31 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_empty_array_of_records_type(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + EXPECTED = { + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, + ], + }, + }, + "parameterValue": {"arrayValues": []}, + } + item_type = StructQueryParameterType( + ScalarQueryParameterType("STRING", name="foo"), + ScalarQueryParameterType("INT64", name="bar"), + ) + klass = self._get_target_class() + param = klass.positional(array_type=item_type, values=[]) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING", ["value"]) other = object() @@ -537,11 +901,38 @@ def test___ne___different_values(self): field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) - def test___repr__(self): + def test___repr__array_type_str(self): field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) + def test___repr__array_type_scalar_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_items = self._make_one( + "int_items", ScalarQueryParameterType("INTEGER"), [64] + ) + expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])" + self.assertEqual(repr(int_items), expected) + + def test___repr__array_type_struct_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + struct_items = self._make_one( + "struct_items", + StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="age"), + ScalarQueryParameterType("STRING", name="last_name"), + ), + [{"age": 18, "last_name": "Doe"}], + ) + expected = ( + "ArrayQueryParameter('struct_items', 'STRUCT', " + "[{'age': 18, 'last_name': 'Doe'}])" + ) + self.assertEqual(repr(struct_items), expected) + class Test_StructQueryParameter(unittest.TestCase): @staticmethod