Skip to content

Commit

Permalink
API: map() on Index returns an Index, not array
Browse files Browse the repository at this point in the history
closes #12766
closes #12798

This is a follow on to #12798.

Author: Nate Yoder <nate@whistle.com>

Closes #14506 from nateyoder/index_map_index and squashes the following commits:

95e4440 [Nate Yoder] fix typo and add ref tag in whatsnew
b36e83c [Nate Yoder] update whatsnew, fix documentation
4635e6a [Nate Yoder] compare as index
a17ddab [Nate Yoder] Fix unused import and docstrings per pep8radius docformatter; change other uses of assert_index_equal to testing instead os self
ab168e7 [Nate Yoder] Update whatsnew and add git PR to tests to denote changes
504c2a2 [Nate Yoder] Fix tests that weren't run by PyCharm
23c133d [Nate Yoder] Update tests to match dtype int64
07b772a [Nate Yoder] use the numpy results if we can to avoid repeating the computation just to create the object
a110be9 [Nate Yoder] make map on time tseries indices return index if dtype of output is not a tseries; sphinx changes; fix docstring
a596744 [Nate Yoder] introspect results from map so that if the output array has tuples we create a multiindex instead of an index
5fc66c3 [Nate Yoder] make map return an index if it operates on an index, multi index, or categorical index; map on a categorical will either return a categorical or an index (rather than a numpy array)
  • Loading branch information
nateyoder authored and jreback committed Dec 16, 2016
1 parent 2566223 commit 6f4e36a
Show file tree
Hide file tree
Showing 13 changed files with 188 additions and 61 deletions.
71 changes: 70 additions & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,77 @@ Other enhancements
Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. _whatsnew_0200.api:

.. _whatsnew.api_breaking.index_map

Map on Index types now return other Index types
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

- ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`)

.. ipython:: python

idx = Index([1, 2])
idx
mi = MultiIndex.from_tuples([(1, 2), (2, 4)])
mi

Previous Behavior:

.. code-block:: ipython

In [5]: idx.map(lambda x: x * 2)
Out[5]: array([2, 4])

In [6]: idx.map(lambda x: (x, x * 2))
Out[6]: array([(1, 2), (2, 4)], dtype=object)

In [7]: mi.map(lambda x: x)
Out[7]: array([(1, 2), (2, 4)], dtype=object)

In [8]: mi.map(lambda x: x[0])
Out[8]: array([1, 2])

New Behavior:

.. ipython:: python

idx.map(lambda x: x * 2)

idx.map(lambda x: (x, x * 2))

mi.map(lambda x: x)

mi.map(lambda x: x[0])


- ``map`` on a Series with datetime64 values may return int64 dtypes rather than int32

.. ipython:: python

s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo'))
s

Previous Behavior:

.. code-block:: ipython

In [9]: s.map(lambda x: x.hour)
Out[9]:
0 0
1 1
2 2
dtype: int32


New Behavior:

.. ipython:: python

s.map(lambda x: x.hour)


.. _whatsnew_0200.api:

- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,8 +930,7 @@ def remove_unused_categories(self, inplace=False):
return cat

def map(self, mapper):
"""
Apply mapper function to its categories (not codes).
"""Apply mapper function to its categories (not codes).
Parameters
----------
Expand All @@ -943,7 +942,8 @@ def map(self, mapper):
Returns
-------
applied : Categorical or np.ndarray.
applied : Categorical or Index.
"""
new_categories = self.categories.map(mapper)
try:
Expand Down
19 changes: 15 additions & 4 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2427,8 +2427,7 @@ def groupby(self, values):
return result

def map(self, mapper):
"""
Apply mapper function to its values.
"""Apply mapper function to an index.
Parameters
----------
Expand All @@ -2437,9 +2436,21 @@ def map(self, mapper):
Returns
-------
applied : array
applied : Union[Index, MultiIndex], inferred
The output of the mapping function applied to the index.
If the function returns a tuple with more than one element
a MultiIndex will be returned.
"""
return self._arrmap(self.values, mapper)
from .multi import MultiIndex
mapped_values = self._arrmap(self.values, mapper)
attributes = self._get_attributes_dict()
if mapped_values.size and isinstance(mapped_values[0], tuple):
return MultiIndex.from_tuples(mapped_values,
names=attributes.get('name'))

attributes['copy'] = False
return Index(mapped_values, **attributes)

def isin(self, values, level=None):
"""
Expand Down
14 changes: 7 additions & 7 deletions pandas/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,22 +517,22 @@ def take(self, indices, axis=0, allow_fill=True,
return self._create_from_codes(taken)

def map(self, mapper):
"""
Apply mapper function to its categories (not codes).
"""Apply mapper function to its categories (not codes).
Parameters
----------
mapper : callable
Function to be applied. When all categories are mapped
to different categories, the result will be Categorical which has
the same order property as the original. Otherwise, the result will
be np.ndarray.
to different categories, the result will be a CategoricalIndex
which has the same order property as the original. Otherwise,
the result will be a Index.
Returns
-------
applied : Categorical or np.ndarray.
applied : CategoricalIndex or Index
"""
return self.values.map(mapper)
return self._shallow_copy_with_infer(self.values.map(mapper))

def delete(self, loc):
"""
Expand Down
62 changes: 52 additions & 10 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,48 @@ def test_sub(self):
self.assertRaises(TypeError, lambda: idx - idx.tolist())
self.assertRaises(TypeError, lambda: idx.tolist() - idx)

def test_map_identity_mapping(self):
# GH 12766
for name, cur_index in self.indices.items():
tm.assert_index_equal(cur_index, cur_index.map(lambda x: x))

def test_map_with_tuples(self):
# GH 12766

# Test that returning a single tuple from an Index
# returns an Index.
boolean_index = tm.makeIntIndex(3).map(lambda x: (x,))
expected = Index([(0,), (1,), (2,)])
tm.assert_index_equal(boolean_index, expected)

# Test that returning a tuple from a map of a single index
# returns a MultiIndex object.
boolean_index = tm.makeIntIndex(3).map(lambda x: (x, x == 1))
expected = MultiIndex.from_tuples([(0, False), (1, True), (2, False)])
tm.assert_index_equal(boolean_index, expected)

# Test that returning a single object from a MultiIndex
# returns an Index.
first_level = ['foo', 'bar', 'baz']
multi_index = MultiIndex.from_tuples(lzip(first_level, [1, 2, 3]))
reduced_index = multi_index.map(lambda x: x[0])
tm.assert_index_equal(reduced_index, Index(first_level))

def test_map_tseries_indices_return_index(self):
date_index = tm.makeDateIndex(10)
exp = Index([1] * 10)
tm.assert_index_equal(exp, date_index.map(lambda x: 1))

period_index = tm.makePeriodIndex(10)
tm.assert_index_equal(exp, period_index.map(lambda x: 1))

tdelta_index = tm.makeTimedeltaIndex(10)
tm.assert_index_equal(exp, tdelta_index.map(lambda x: 1))

date_index = tm.makeDateIndex(24, freq='h', name='hourly')
exp = Index(range(24), name='hourly')
tm.assert_index_equal(exp, date_index.map(lambda x: x.hour))

def test_append_multiple(self):
index = Index(['a', 'b', 'c', 'd', 'e', 'f'])

Expand Down Expand Up @@ -1194,16 +1236,16 @@ def check_slice(in_slice, expected):
self.assert_index_equal(result, expected)

for in_slice, expected in [
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
(SLC['y'::-4], 'yb'),
# absent labels
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
(SLC['m':'m':-1], '')
(SLC[::-1], 'yxdcb'), (SLC['b':'y':-1], ''),
(SLC['b'::-1], 'b'), (SLC[:'b':-1], 'yxdcb'),
(SLC[:'y':-1], 'y'), (SLC['y'::-1], 'yxdcb'),
(SLC['y'::-4], 'yb'),
# absent labels
(SLC[:'a':-1], 'yxdcb'), (SLC[:'a':-2], 'ydb'),
(SLC['z'::-1], 'yxdcb'), (SLC['z'::-3], 'yc'),
(SLC['m'::-1], 'dcb'), (SLC[:'m':-1], 'yx'),
(SLC['a':'a':-1], ''), (SLC['z':'z':-1], ''),
(SLC['m':'m':-1], '')
]:
check_slice(in_slice, expected)

Expand Down
23 changes: 12 additions & 11 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,19 +207,20 @@ def test_map(self):
ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'),
ordered=True)
result = ci.map(lambda x: x.lower())
exp = pd.Categorical(list('ababc'), categories=list('cba'),
ordered=True)
tm.assert_categorical_equal(result, exp)
exp = pd.CategoricalIndex(list('ababc'), categories=list('cba'),
ordered=True)
tm.assert_index_equal(result, exp)

ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
ordered=False, name='XXX')
result = ci.map(lambda x: x.lower())
exp = pd.Categorical(list('ababc'), categories=list('bac'),
ordered=False)
tm.assert_categorical_equal(result, exp)
exp = pd.CategoricalIndex(list('ababc'), categories=list('bac'),
ordered=False, name='XXX')
tm.assert_index_equal(result, exp)

tm.assert_numpy_array_equal(ci.map(lambda x: 1),
np.array([1] * 5, dtype=np.int64))
# GH 12766: Return an index not an array
tm.assert_index_equal(ci.map(lambda x: 1),
Index(np.array([1] * 5, dtype=np.int64), name='XXX'))

# change categories dtype
ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'),
Expand All @@ -228,9 +229,9 @@ def f(x):
return {'A': 10, 'B': 20, 'C': 30}.get(x)

result = ci.map(f)
exp = pd.Categorical([10, 20, 10, 20, 30], categories=[20, 10, 30],
ordered=False)
tm.assert_categorical_equal(result, exp)
exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30],
ordered=False)
tm.assert_index_equal(result, exp)

def test_where(self):
i = self.create_index()
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,9 @@ def test_apply_datetimetz(self):
tm.assert_series_equal(result, exp)

# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.apply(lambda x: x.hour)
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
tm.assert_series_equal(result, exp)

# not vectorized
Expand Down Expand Up @@ -317,8 +318,9 @@ def test_map_datetimetz(self):
tm.assert_series_equal(result, exp)

# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.map(lambda x: x.hour)
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int32)
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
tm.assert_series_equal(result, exp)

with tm.assertRaises(NotImplementedError):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,8 @@ def test_map(self):
tm.assert_categorical_equal(result, exp)

result = c.map(lambda x: 1)
tm.assert_numpy_array_equal(result, np.array([1] * 5, dtype=np.int64))
# GH 12766: Return an index not an array
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))


class TestCategoricalAsBlock(tm.TestCase):
Expand Down
12 changes: 8 additions & 4 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from pandas.util.decorators import Appender, cache_readonly
import pandas.types.concat as _concat
import pandas.tseries.frequencies as frequencies
import pandas.algos as _algos


class DatelikeOps(object):
Expand Down Expand Up @@ -330,11 +329,16 @@ def _nat_new(self, box=True):
def map(self, f):
try:
result = f(self)
if not isinstance(result, (np.ndarray, Index)):
raise TypeError

# Try to use this result if we can
if isinstance(result, np.ndarray):
self._shallow_copy(result)

if not isinstance(result, Index):
raise TypeError('The map function must return an Index object')
return result
except Exception:
return _algos.arrmap_object(self.asobject.values, f)
return self.asobject.map(f)

def sort_values(self, return_indexer=False, ascending=True):
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/tseries/tests/test_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import nose

import numpy as np
from pandas import Timestamp, Period
from pandas import Timestamp, Period, Index
from pandas.compat import u
import pandas.util.testing as tm
from pandas.tseries.offsets import Second, Milli, Micro
Expand Down Expand Up @@ -104,8 +104,8 @@ def test_dateindex_conversion(self):
for freq in ('B', 'L', 'S'):
dateindex = tm.makeDateIndex(k=10, freq=freq)
rs = self.dtc.convert(dateindex, None, None)
xp = converter.dates.date2num(dateindex._mpl_repr())
tm.assert_almost_equal(rs, xp, decimals)
xp = Index(converter.dates.date2num(dateindex._mpl_repr()))
tm.assert_index_equal(rs, xp, decimals)

def test_resolution(self):
def _assert_less(ts1, ts2):
Expand Down
15 changes: 6 additions & 9 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -3521,8 +3521,8 @@ def test_map(self):
tm.assert_index_equal(result, expected)

result = index.map(lambda x: x.ordinal)
exp = np.array([x.ordinal for x in index], dtype=np.int64)
tm.assert_numpy_array_equal(result, exp)
exp = Index([x.ordinal for x in index])
tm.assert_index_equal(result, exp)

def test_map_with_string_constructor(self):
raw = [2005, 2007, 2009]
Expand All @@ -3534,20 +3534,17 @@ def test_map_with_string_constructor(self):
types += text_type,

for t in types:
expected = np.array(lmap(t, raw), dtype=object)
expected = Index(lmap(t, raw))
res = index.map(t)

# should return an array
tm.assertIsInstance(res, np.ndarray)
# should return an Index
tm.assertIsInstance(res, Index)

# preserve element types
self.assertTrue(all(isinstance(resi, t) for resi in res))

# dtype should be object
self.assertEqual(res.dtype, np.dtype('object').type)

# lastly, values should compare equal
tm.assert_numpy_array_equal(res, expected)
tm.assert_index_equal(res, expected)

def test_convert_array_of_periods(self):
rng = period_range('1/1/2000', periods=20, freq='D')
Expand Down
Loading

0 comments on commit 6f4e36a

Please sign in to comment.