Skip to content

Commit 6c268de

Browse files
committed
Merge branch 'long-str-unconvert-fix' of https://github.com/wabu/pandas into wabu-long-str-unconvert-fix
Conflicts: doc/source/release.rst
2 parents 7cc61b2 + 03bfa4b commit 6c268de

File tree

3 files changed

+33
-7
lines changed

3 files changed

+33
-7
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ Bug Fixes
9090
- ``HDFStore.remove`` now handles start and stop (:issue:`6177`)
9191
- ``HDFStore.select_as_multiple`` handles start and stop the same way as ``select`` (:issue:`6177`)
9292
- ``HDFStore.select_as_coordinates`` and ``select_column`` works where clauses that result in filters (:issue:`6177`)
93+
- Bug with numpy < 1.7.2 when reading long strings from ``HDFStore`` (:issue:`6166`)
9394

9495
pandas 0.13.1
9596
-------------

pandas/io/pytables.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@
4040
import pandas.tslib as tslib
4141

4242
from contextlib import contextmanager
43+
from distutils.version import LooseVersion
4344

4445
# versioning attribute
4546
_version = '0.10.1'
4647

4748
# PY3 encoding if we don't specify
4849
_default_encoding = 'UTF-8'
4950

51+
_np_version_under_172 = LooseVersion(np.__version__) < '1.7.2'
5052

5153
def _ensure_decoded(s):
5254
""" if we have bytes, decode them to unicde """
@@ -776,8 +778,8 @@ def func(_start, _stop):
776778
c = s.read_coordinates(where=where, start=_start, stop=_stop, **kwargs)
777779
else:
778780
c = None
779-
780-
objs = [t.read(where=c, start=_start, stop=_stop,
781+
782+
objs = [t.read(where=c, start=_start, stop=_stop,
781783
columns=columns, **kwargs) for t in tbls]
782784

783785
# concat and return
@@ -4165,7 +4167,6 @@ def _convert_string_array(data, encoding, itemsize=None):
41654167
data = np.array(data, dtype="S%d" % itemsize)
41664168
return data
41674169

4168-
41694170
def _unconvert_string_array(data, nan_rep=None, encoding=None):
41704171
""" deserialize a string array, possibly decoding """
41714172
shape = data.shape
@@ -4175,8 +4176,14 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
41754176
# where the passed encoding is actually None)
41764177
encoding = _ensure_encoding(encoding)
41774178
if encoding is not None and len(data):
4179+
if _np_version_under_172:
4180+
itemsize = lib.max_len_string_array(data)
4181+
dtype = "S{0}".format(itemsize)
4182+
else:
4183+
dtype = string_types[0]
4184+
41784185
try:
4179-
data = data.astype(string_types).astype(object)
4186+
data = data.astype(dtype).astype(object)
41804187
except:
41814188
f = np.vectorize(lambda x: x.decode(encoding), otypes=[np.object])
41824189
data = f(data)

pandas/io/tests/test_pytables.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,24 @@ def roundtrip(key, obj,**kwargs):
176176
finally:
177177
safe_remove(self.path)
178178

179+
def test_long_strings(self):
180+
df = DataFrame({'a': [tm.rands(100) for _ in range(10)]},
181+
index=[tm.rands(100) for _ in range(10)])
182+
183+
with ensure_clean_store(self.path) as store:
184+
store.append('df', df, data_columns=['a'])
185+
assert_frame_equal(store['df'], df)
186+
187+
# test with an encoding
188+
if LooseVersion(tables.__version__) < '3.0.0':
189+
raise nose.SkipTest('tables version does not support proper encoding')
190+
if sys.byteorder != 'little':
191+
raise nose.SkipTest('system byteorder is not little')
192+
193+
with ensure_clean_store(self.path) as store:
194+
store.append('df', df, data_columns=['a'], encoding='ascii')
195+
assert_frame_equal(store['df'], df)
196+
179197
def test_api(self):
180198

181199
# GH4584
@@ -2199,7 +2217,7 @@ def test_remove_startstop(self):
21992217
# GH #4835 and #6177
22002218

22012219
with ensure_clean_store(self.path) as store:
2202-
2220+
22032221
wp = tm.makePanel()
22042222

22052223
# start
@@ -2246,7 +2264,7 @@ def test_remove_startstop(self):
22462264
result = store.select('wp6')
22472265
expected = wp.reindex(major_axis=wp.major_axis)
22482266
assert_panel_equal(result, expected)
2249-
2267+
22502268
# with where
22512269
date = wp.major_axis.take(np.arange(0,30,3))
22522270
crit = Term('major_axis=date')
@@ -2256,7 +2274,7 @@ def test_remove_startstop(self):
22562274
result = store.select('wp7')
22572275
expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
22582276
assert_panel_equal(result, expected)
2259-
2277+
22602278

22612279
def test_remove_crit(self):
22622280

0 commit comments

Comments
 (0)