Skip to content

PERF: perf regression index construction from series (GH6150) #6153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 29, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Improvements to existing features
- add ability to recognize '%p' format code (am/pm) to date parsers when the specific format
is supplied (:issue:`5361`)
- Fix performance regression in JSON IO (:issue:`5765`)
- performance regression in Index construction from Series (:issue:`6150`)

.. _release.bug_fixes-0.13.1:

Expand Down
10 changes: 5 additions & 5 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from pandas.util.decorators import cache_readonly, deprecate
from pandas.core.common import isnull
import pandas.core.common as com
from pandas.core.common import _values_from_object, is_float, is_integer
from pandas.core.common import _values_from_object, is_float, is_integer, ABCSeries
from pandas.core.config import get_option

# simplify
Expand Down Expand Up @@ -105,7 +105,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
return subarr

from pandas.tseries.period import PeriodIndex
if isinstance(data, np.ndarray):
if isinstance(data, (np.ndarray, ABCSeries)):
if issubclass(data.dtype.type, np.datetime64):
from pandas.tseries.index import DatetimeIndex
result = DatetimeIndex(data, copy=copy, name=name, **kwargs)
Expand Down Expand Up @@ -212,7 +212,7 @@ def _coerce_to_ndarray(cls, data):
cls._scalar_data_error(data)

# other iterable of some kind
if not isinstance(data, (list, tuple)):
if not isinstance(data, (ABCSeries, list, tuple)):
data = list(data)
data = np.asarray(data)
return data
Expand Down Expand Up @@ -767,7 +767,7 @@ def asof(self, label):
For a sorted index, return the most recent label up to and including
the passed label. Return NaN if not found
"""
if isinstance(label, (Index, np.ndarray)):
if isinstance(label, (Index, ABCSeries, np.ndarray)):
raise TypeError('%s' % type(label))

if label not in self:
Expand Down Expand Up @@ -1535,7 +1535,7 @@ def slice_locs(self, start=None, end=None):

# get_loc will return a boolean array for non_uniques
# if we are not monotonic
if isinstance(start_slice, np.ndarray):
if isinstance(start_slice, (ABCSeries, np.ndarray)):
raise KeyError("cannot peform a slice operation "
"on a non-unique non-monotonic index")

Expand Down
14 changes: 8 additions & 6 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
is_list_like,_values_from_object, _maybe_box,
notnull)
notnull, ABCSeries)
from pandas.core.index import Index, Int64Index, _Identity
import pandas.compat as compat
from pandas.compat import u
Expand Down Expand Up @@ -52,9 +52,9 @@ def f(self):
def _join_i8_wrapper(joinf, with_indexers=True):
@staticmethod
def wrapper(left, right):
if isinstance(left, np.ndarray):
if isinstance(left, (np.ndarray, ABCSeries)):
left = left.view('i8', type=np.ndarray)
if isinstance(right, np.ndarray):
if isinstance(right, (np.ndarray, ABCSeries)):
right = right.view('i8', type=np.ndarray)
results = joinf(left, right)
if with_indexers:
Expand All @@ -77,7 +77,7 @@ def wrapper(self, other):
other = DatetimeIndex(other)
elif isinstance(other, compat.string_types):
other = _to_m8(other, tz=self.tz)
elif not isinstance(other, np.ndarray):
elif not isinstance(other, (np.ndarray, ABCSeries)):
other = _ensure_datetime64(other)
result = func(other)

Expand Down Expand Up @@ -195,7 +195,7 @@ def __new__(cls, data=None,
tz=tz, normalize=normalize, closed=closed,
infer_dst=infer_dst)

if not isinstance(data, np.ndarray):
if not isinstance(data, (np.ndarray, ABCSeries)):
if np.isscalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
Expand Down Expand Up @@ -228,6 +228,8 @@ def __new__(cls, data=None,
yearfirst=yearfirst)

if issubclass(data.dtype.type, np.datetime64):
if isinstance(data, ABCSeries):
data = data.values
if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
Expand Down Expand Up @@ -1400,7 +1402,7 @@ def freqstr(self):
nanosecond = _field_accessor('nanosecond', 'ns')
weekofyear = _field_accessor('weekofyear', 'woy')
week = weekofyear
dayofweek = _field_accessor('dayofweek', 'dow',
dayofweek = _field_accessor('dayofweek', 'dow',
"The day of the week with Monday=0, Sunday=6")
weekday = dayofweek
dayofyear = _field_accessor('dayofyear', 'doy')
Expand Down
8 changes: 8 additions & 0 deletions vb_suite/ctors.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,11 @@
"""

ctor_index_array_string = Benchmark('Index(data)', setup=setup)

# index constructors
setup = common_setup + """
s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]*1000)
"""
index_from_series_ctor = Benchmark('Index(s)', setup=setup)

dtindex_from_series_ctor = Benchmark('DatetimeIndex(s)', setup=setup)