From a69348a558f48cfc61d03d3e8bb7f9aee48bea86 Mon Sep 17 00:00:00 2001 From: chelsea-lin <124939984+chelsea-lin@users.noreply.github.com> Date: Tue, 28 Mar 2023 07:54:39 -0700 Subject: [PATCH 1/3] fix: keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index (#1535) --- google/cloud/bigquery/_pandas_helpers.py | 4 +- tests/unit/test__pandas_helpers.py | 106 +++++++++++++++++------ 2 files changed, 82 insertions(+), 28 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index dfd966c64..601aa13df 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -481,7 +481,7 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe[column]) + sample_data = _first_valid(dataframe.reset_index()[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia @@ -544,7 +544,7 @@ def augment_schema(dataframe, current_bq_schema): augmented_schema.append(field) continue - arrow_table = pyarrow.array(dataframe[field.name]) + arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) if pyarrow.types.is_list(arrow_table.type): # `pyarrow.ListType` diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 885cd318c..07bf03f66 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -930,32 +930,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test): assert columns_and_indexes == expected -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_dict_sequence(module_under_test): - df_data = collections.OrderedDict( - [ - ("str_column", ["hello", "world"]), - ("int_column", [42, 8]), - ("bool_column", [True, False]), - ] - ) - dataframe = pandas.DataFrame(df_data) - - dict_schema = [ - {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, - {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, - ] - - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) - - expected_schema = ( - schema.SchemaField("str_column", "STRING", "NULLABLE"), - schema.SchemaField("int_column", "INTEGER", "NULLABLE"), - schema.SchemaField("bool_column", "BOOL", "REQUIRED"), - ) - assert returned_schema == expected_schema - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_arrow_with_multiindex(module_under_test): bq_schema = ( @@ -1190,6 +1164,86 @@ def test_dataframe_to_parquet_compression_method(module_under_test): assert call_args.kwargs.get("compression") == "ZSTD" +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.Index(["a", "b"], name="str_index") + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_multiindex(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + index = pandas.MultiIndex.from_tuples( + [ + ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)), + ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)), + ], + names=["str_index", "int_index", "dt_index"], + ) + dataframe = pandas.DataFrame(df_data, index=index) + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + + expected_schema = ( + schema.SchemaField("str_index", "STRING", "NULLABLE"), + schema.SchemaField("int_index", "INTEGER", "NULLABLE"), + schema.SchemaField("dt_index", "DATETIME", "NULLABLE"), + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"), + ) + assert returned_schema == expected_schema + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): + df_data = collections.OrderedDict( + [ + ("str_column", ["hello", "world"]), + ("int_column", [42, 8]), + ("bool_column", [True, False]), + ] + ) + dataframe = pandas.DataFrame(df_data) + + dict_schema = [ + {"name": "str_column", "type": "STRING", "mode": "NULLABLE"}, + {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, + ] + + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + + expected_schema = ( + schema.SchemaField("str_column", "STRING", "NULLABLE"), + schema.SchemaField("int_column", "INTEGER", "NULLABLE"), + schema.SchemaField("bool_column", "BOOL", "REQUIRED"), + ) + assert returned_schema == expected_schema + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): dataframe = pandas.DataFrame( From 339eb0e86040a7c30d140800f34810ffc6a7c76b Mon Sep 17 00:00:00 2001 From: r1b Date: Tue, 28 Mar 2023 11:37:04 -0400 Subject: [PATCH 2/3] feat: expose query job on dbapi cursor (#1520) Co-authored-by: Tim Swast --- google/cloud/bigquery/dbapi/cursor.py | 10 ++++++++++ tests/unit/test_dbapi_cursor.py | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 03f3b72ca..0dc8f56ab 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -79,6 +79,16 @@ def __init__(self, connection): self._query_job = None self._closed = False + @property + def query_job(self): + """google.cloud.bigquery.job.query.QueryJob: The query job created by + the last ``execute*()`` call. + + .. note:: + If the last ``execute*()`` call was ``executemany()``, this is the + last job created by ``executemany()``.""" + return self._query_job + def close(self): """Mark the cursor as closed, preventing its further use.""" self._closed = True diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index b550bbce0..fc6ea3882 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -662,6 +662,29 @@ def test_is_iterable(self): "Iterating again over the same results should produce no rows.", ) + def test_query_job_wo_execute(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + self.assertIsNone(cursor.query_job) + + def test_query_job_w_execute(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.execute("SELECT 1;") + self.assertIsInstance(cursor.query_job, QueryJob) + + def test_query_job_w_executemany(self): + from google.cloud.bigquery import dbapi, QueryJob + + connection = dbapi.connect(self._mock_client()) + cursor = connection.cursor() + cursor.executemany("SELECT %s;", (("1",), ("2",))) + self.assertIsInstance(cursor.query_job, QueryJob) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 5d0ebf462e49cb7aea474c9de3a8c2ea4fa11c58 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:14:52 -0700 Subject: [PATCH 3/3] chore(main): release 3.9.0 (#1537) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3fc839a..5bbde01f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28) + + +### Features + +* Expose query job on dbapi cursor ([#1520](https://github.com/googleapis/python-bigquery/issues/1520)) ([339eb0e](https://github.com/googleapis/python-bigquery/commit/339eb0e86040a7c30d140800f34810ffc6a7c76b)) + + +### Bug Fixes + +* Keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index ([#1535](https://github.com/googleapis/python-bigquery/issues/1535)) ([a69348a](https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86)) + ## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8f4ba4810..0bc275357 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.8.0" +__version__ = "3.9.0"