Skip to content

Commit afa9752

Browse files
kiraksiLinchin
andauthored
chore: refactor version checks for pandas library (#1711)
* chore: refactor version checks for pandas library * readded removed importing of pandas * revert bad commit * merged from main, added type:ignore tag to get around mypy error * Added ignore statement for mypy error, removed checking max version of Pandas * updated docstring error * Added parameterize to test to test multiple supported versons --------- Co-authored-by: Lingqing Gan <[email protected]>
1 parent 40bc244 commit afa9752

File tree

5 files changed

+123
-2
lines changed

5 files changed

+123
-2
lines changed

google/cloud/bigquery/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@
202202
# Custom exceptions
203203
"LegacyBigQueryStorageError",
204204
"LegacyPyarrowError",
205+
"LegacyPandasError",
205206
]
206207

207208

google/cloud/bigquery/_versions_helpers.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
2525
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
2626
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
27+
_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0")
2728

2829

2930
class PyarrowVersions:
@@ -171,3 +172,65 @@ def try_import(self, raise_if_error: bool = False) -> Any:
171172

172173

173174
BQ_STORAGE_VERSIONS = BQStorageVersions()
175+
176+
177+
class PandasVersions:
178+
"""Version comparisons for pandas package."""
179+
180+
def __init__(self):
181+
self._installed_version = None
182+
183+
@property
184+
def installed_version(self) -> packaging.version.Version:
185+
"""Return the parsed version of pandas"""
186+
if self._installed_version is None:
187+
import pandas # type: ignore
188+
189+
self._installed_version = packaging.version.parse(
190+
# Use 0.0.0, since it is earlier than any released version.
191+
# Legacy versions also have the same property, but
192+
# creating a LegacyVersion has been deprecated.
193+
# https://github.com/pypa/packaging/issues/321
194+
getattr(pandas, "__version__", "0.0.0")
195+
)
196+
197+
return self._installed_version
198+
199+
def try_import(self, raise_if_error: bool = False) -> Any:
200+
"""Verify that a recent enough version of pandas extra is installed.
201+
The function assumes that pandas extra is installed, and should thus
202+
be used in places where this assumption holds.
203+
Because `pip` can install an outdated version of this extra despite
204+
the constraints in `setup.py`, the calling code can use this helper
205+
to verify the version compatibility at runtime.
206+
Returns:
207+
The ``pandas`` module or ``None``.
208+
Raises:
209+
exceptions.LegacyPandasError:
210+
If the pandas package is outdated and ``raise_if_error`` is
211+
``True``.
212+
"""
213+
try:
214+
import pandas
215+
except ImportError as exc: # pragma: NO COVER
216+
if raise_if_error:
217+
raise exceptions.LegacyPandasError(
218+
"pandas package not found. Install pandas version >="
219+
f" {_MIN_PANDAS_VERSION}"
220+
) from exc
221+
return None
222+
223+
if self.installed_version < _MIN_PANDAS_VERSION:
224+
if raise_if_error:
225+
msg = (
226+
"Dependency pandas is outdated, please upgrade"
227+
f" it to version >= {_MIN_PANDAS_VERSION}"
228+
f" (version found: {self.installed_version})."
229+
)
230+
raise exceptions.LegacyPandasError(msg)
231+
return None
232+
233+
return pandas
234+
235+
236+
PANDAS_VERSIONS = PandasVersions()

google/cloud/bigquery/client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@
115115
from google.cloud.bigquery.table import RowIterator
116116

117117
pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
118+
pandas = (
119+
_versions_helpers.PANDAS_VERSIONS.try_import()
120+
) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this
118121

119122
TimeoutType = Union[float, None]
120123
ResumableTimeoutType = Union[
@@ -124,7 +127,6 @@
124127
if typing.TYPE_CHECKING: # pragma: NO COVER
125128
# os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.
126129
PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]]
127-
import pandas # type: ignore
128130
import requests # required by api-core
129131

130132
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
@@ -2488,7 +2490,7 @@ def load_table_from_file(
24882490

24892491
def load_table_from_dataframe(
24902492
self,
2491-
dataframe: "pandas.DataFrame",
2493+
dataframe: "pandas.DataFrame", # type: ignore
24922494
destination: Union[Table, TableReference, str],
24932495
num_retries: int = _DEFAULT_NUM_RETRIES,
24942496
job_id: Optional[str] = None,

google/cloud/bigquery/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,7 @@ class BigQueryStorageNotFoundError(BigQueryError):
2929
"""Raised when BigQuery Storage extra is not installed when trying to
3030
import it.
3131
"""
32+
33+
34+
class LegacyPandasError(BigQueryError):
35+
"""Raised when too old a version of pandas package is detected at runtime."""

tests/unit/test__versions_helpers.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
except ImportError: # pragma: NO COVER
2727
bigquery_storage = None
2828

29+
try:
30+
import pandas # type: ignore
31+
except ImportError: # pragma: NO COVER
32+
pandas = None
33+
2934
from google.cloud.bigquery import _versions_helpers
3035
from google.cloud.bigquery import exceptions
3136

@@ -173,3 +178,49 @@ def test_bqstorage_is_read_session_optional_false():
173178
bqstorage_versions = _versions_helpers.BQStorageVersions()
174179
with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"):
175180
assert not bqstorage_versions.is_read_session_optional
181+
182+
183+
@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
184+
@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"])
185+
def test_try_import_raises_no_error_w_recent_pandas(version):
186+
versions = _versions_helpers.PandasVersions()
187+
with mock.patch("pandas.__version__", new=version):
188+
try:
189+
pandas = versions.try_import(raise_if_error=True)
190+
assert pandas is not None
191+
except exceptions.LegacyPandasError: # pragma: NO COVER
192+
raise ("Legacy error raised with a non-legacy dependency version.")
193+
194+
195+
@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
196+
def test_try_import_returns_none_w_legacy_pandas():
197+
versions = _versions_helpers.PandasVersions()
198+
with mock.patch("pandas.__version__", new="1.0.0"):
199+
pandas = versions.try_import()
200+
assert pandas is None
201+
202+
203+
@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
204+
def test_try_import_raises_error_w_legacy_pandas():
205+
versions = _versions_helpers.PandasVersions()
206+
with mock.patch("pandas.__version__", new="1.0.0"):
207+
with pytest.raises(exceptions.LegacyPandasError):
208+
versions.try_import(raise_if_error=True)
209+
210+
211+
@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
212+
def test_installed_pandas_version_returns_cached():
213+
versions = _versions_helpers.PandasVersions()
214+
versions._installed_version = object()
215+
assert versions.installed_version is versions._installed_version
216+
217+
218+
@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
219+
def test_installed_pandas_version_returns_parsed_version():
220+
versions = _versions_helpers.PandasVersions()
221+
with mock.patch("pandas.__version__", new="1.1.0"):
222+
version = versions.installed_version
223+
224+
assert version.major == 1
225+
assert version.minor == 1
226+
assert version.micro == 0

0 commit comments

Comments
 (0)