diff --git a/.coveragerc b/.coveragerc index dd39c8546..0d8e6297d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -17,6 +17,8 @@ # Generated by synthtool. DO NOT EDIT! [run] branch = True +omit = + google/cloud/__init__.py [report] fail_under = 100 @@ -32,4 +34,5 @@ omit = */gapic/*.py */proto/*.py */core/*.py - */site-packages/*.py \ No newline at end of file + */site-packages/*.py + google/cloud/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dac7a0f1..ad6c9551f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) + + +### Features + +* add constants for MONTH and YEAR time partitioning types ([#283](https://www.github.com/googleapis/python-bigquery/issues/283)) ([9090e1c](https://www.github.com/googleapis/python-bigquery/commit/9090e1ccd8825a97835325b4829f6e7ecfd9ea88)) + + +### Bug Fixes + +* remove unnecessary dependency on libcst ([#308](https://www.github.com/googleapis/python-bigquery/issues/308)) ([c055930](https://www.github.com/googleapis/python-bigquery/commit/c05593094c1405f752b2c51b15202a6dbb5cb83f)) + + +### Performance Improvements + +* remove redundant array deepcopy ([#26](https://www.github.com/googleapis/python-bigquery/issues/26)) ([b54f867](https://www.github.com/googleapis/python-bigquery/commit/b54f86769c982ce5c8fcbf3889f82450428bb40c)) + + +### Documentation + +* **samples:** add create_table_clustered code snippet ([#291](https://www.github.com/googleapis/python-bigquery/issues/291)) ([d1eb8b3](https://www.github.com/googleapis/python-bigquery/commit/d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff)) + ## 2.0.0 09-30-2020 14:51 PDT diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 27af7c7df..7afca05e2 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a clustered table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_clustered] + :end-before: [END bigquery_create_table_clustered] + Create an integer range partitioned table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 47851d42c..b59b3d794 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -15,7 +15,6 @@ """Shared helper functions for BigQuery API classes.""" import base64 -import copy import datetime import decimal import re @@ -397,13 +396,9 @@ def _repeated_field_to_json(field, row_value): Returns: List[Any]: A list of JSON-serializable objects. """ - # Remove the REPEATED, but keep the other fields. This allows us to process - # each item as if it were a top-level field. - item_field = copy.deepcopy(field) - item_field._mode = "NULLABLE" values = [] for item in row_value: - values.append(_field_to_json(item_field, item)) + values.append(_single_field_to_json(field, item)) return values @@ -462,6 +457,33 @@ def _record_field_to_json(fields, row_value): return record +def _single_field_to_json(field, row_value): + """Convert a single field into JSON-serializable values. + + Ignores mode so that this can function for ARRAY / REPEATING fields + without requiring a deepcopy of the field. See: + https://github.com/googleapis/python-bigquery/issues/6 + + Args: + field (google.cloud.bigquery.schema.SchemaField): + The SchemaField to use for type conversion and field name. + + row_value (Any): + Scalar or Struct to be inserted. The type + is inferred from the SchemaField's field_type. + + Returns: + Any: A JSON-serializable object. + """ + if row_value is None: + return None + + if field.field_type == "RECORD": + return _record_field_to_json(field.fields, row_value) + + return _scalar_field_to_json(field, row_value) + + def _field_to_json(field, row_value): """Convert a field into JSON-serializable values. @@ -483,10 +505,7 @@ def _field_to_json(field, row_value): if field.mode == "REPEATED": return _repeated_field_to_json(field, row_value) - if field.field_type == "RECORD": - return _record_field_to_json(field.fields, row_value) - - return _scalar_field_to_json(field, row_value) + return _single_field_to_json(field, row_value) def _snake_to_camel_case(value): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 902a7040a..a72bacb74 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1980,6 +1980,12 @@ class TimePartitioningType(object): HOUR = "HOUR" """str: Generates one partition per hour.""" + MONTH = "MONTH" + """str: Generates one partition per month.""" + + YEAR = "YEAR" + """str: Generates one partition per year.""" + class TimePartitioning(object): """Configures time-based partitioning for a table. @@ -1987,13 +1993,24 @@ class TimePartitioning(object): Args: type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): Specifies the type of time partitioning to perform. Defaults to - :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`, - which is the only currently supported type. + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. + + Supported values are: + + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR` + field (Optional[str]): If set, the table is partitioned by this field. If not set, the table is partitioned by pseudo column ``_PARTITIONTIME``. The field - must be a top-level ``TIMESTAMP`` or ``DATE`` field. Its mode must - be ``NULLABLE`` or ``REQUIRED``. + must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE`` + field. Its mode must be ``NULLABLE`` or ``REQUIRED``. + + See the `time-unit column-partitioned tables guide + `_ + in the BigQuery documentation. expiration_ms(Optional[int]): Number of milliseconds for which to keep the storage for a partition. diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py new file mode 100644 index 000000000..2b45b747e --- /dev/null +++ b/samples/create_table_clustered.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_clustered(table_id): + + # [START bigquery_create_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("full_name", "STRING"), + bigquery.SchemaField("city", "STRING"), + bigquery.SchemaField("zipcode", "INTEGER"), + ] + + table = bigquery.Table(table_id, schema=schema) + table.clustering_fields = ["city", "zipcode"] + table = client.create_table(table) # Make an API request. + print( + "Created clustered table {}.{}.{}".format( + table.project, table.dataset_id, table.table_id + ) + ) + # [END bigquery_create_table_clustered] + return table diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7fe839119..6edca4f10 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==2.0.0 google-auth-oauthlib==0.4.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' diff --git a/samples/tests/test_create_table_clustered.py b/samples/tests/test_create_table_clustered.py new file mode 100644 index 000000000..8eab5d48b --- /dev/null +++ b/samples/tests/test_create_table_clustered.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_table_clustered + + +def test_create_table_clustered(capsys, random_table_id): + table = create_table_clustered.create_table_clustered(random_table_id) + out, _ = capsys.readouterr() + assert "Created clustered table {}".format(random_table_id) in out + assert table.clustering_fields == ["city", "zipcode"] diff --git a/scripts/fixup_bigquery_v2_keywords.py b/scripts/fixup_bigquery_v2_keywords.py deleted file mode 100644 index 82b46d64e..000000000 --- a/scripts/fixup_bigquery_v2_keywords.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class bigqueryCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'delete_model': ('project_id', 'dataset_id', 'model_id', ), - 'get_model': ('project_id', 'dataset_id', 'model_id', ), - 'list_models': ('project_id', 'dataset_id', 'max_results', 'page_token', ), - 'patch_model': ('project_id', 'dataset_id', 'model_id', 'model', ), - - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=bigqueryCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the bigquery client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index 2cb57aad2..14b38b63e 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "2.0.0" +version = "2.1.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' @@ -31,7 +31,6 @@ dependencies = [ "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", "proto-plus >= 1.10.0", - "libcst >= 0.2.5", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", @@ -125,7 +124,6 @@ install_requires=dependencies, extras_require=extras, python_requires=">=3.6", - scripts=["scripts/fixup_bigquery_v2_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/synth.metadata b/synth.metadata index c47ff1e51..d40e66dac 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,15 +3,23 @@ { "git": { "name": ".", - "remote": "git@github.com:plamut/python-bigquery.git", - "sha": "64d666033446f9af669bb8eb9170b8f62d6308e4" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "fbbe0cb0ea22161d81f1e5504bb89b55e4198634" + } + }, + { + "git": { + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", + "internalRef": "327026955" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "8a7a3021fe97aa0a3641db642fe2b767f1c8110f" + "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" } } ], diff --git a/synth.py b/synth.py index 501380be2..97466d0f4 100644 --- a/synth.py +++ b/synth.py @@ -36,6 +36,7 @@ "README.rst", "noxfile.py", "setup.py", + "scripts/fixup_bigquery_v2_keywords.py", library / f"google/cloud/bigquery/__init__.py", library / f"google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 28ebe8144..16c4fb8a5 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -806,6 +806,41 @@ def test_w_known_field_type(self): self.assertEqual(converted, str(original)) +class Test_single_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _single_field_to_json + + return _single_field_to_json(field, value) + + def test_w_none(self): + field = _make_field("INT64") + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted) + + def test_w_record(self): + subfields = [ + _make_field("INT64", name="one"), + _make_field("STRING", name="two"), + ] + field = _make_field("RECORD", fields=subfields) + original = {"one": 42, "two": "two"} + converted = self._call_fut(field, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_scalar(self): + field = _make_field("INT64") + original = 42 + converted = self._call_fut(field, original) + self.assertEqual(converted, str(original)) + + def test_w_scalar_ignores_mode(self): + field = _make_field("STRING", mode="REPEATED") + original = "hello world" + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + + class Test_repeated_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _repeated_field_to_json diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 4da805d48..691c4c802 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -35,15 +35,33 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + conn = self._make_one(object()) - URI = "/".join([conn.DEFAULT_API_ENDPOINT, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_custom_endpoint(self): - custom_endpoint = "https://www.foo-googleapis.com" + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + + custom_endpoint = "https://foo-bigquery.googleapis.com" conn = self._make_one(object(), api_endpoint=custom_endpoint) - URI = "/".join([custom_endpoint, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), custom_endpoint) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl