|
8 | 8 |
|
9 | 9 | from pandas import DataFrame, Index, MultiIndex, Series, compat |
10 | 10 | from pandas.core import common as com |
11 | | -from pandas.core.arrays.categorical import ( |
12 | | - _factorize_from_iterable, _factorize_from_iterables) |
13 | 11 | from pandas.core.generic import NDFrame |
14 | | -from pandas.core.index import ( |
15 | | - _all_indexes_same, _get_consensus_names, _get_objs_combined_axis, |
16 | | - ensure_index) |
| 12 | +from pandas.core.index import _get_objs_combined_axis, ensure_index |
17 | 13 | import pandas.core.indexes.base as ibase |
18 | 14 | from pandas.core.internals import concatenate_block_managers |
19 | 15 |
|
@@ -533,103 +529,62 @@ def _concat_indexes(indexes): |
533 | 529 |
|
534 | 530 |
|
535 | 531 | def _make_concat_multiindex(indexes, keys, levels=None, names=None): |
| 532 | + """ |
| 533 | + Produce a MultiIndex which includes concatenated pieces in "indexes", |
| 534 | + prepended by one or more levels defined by "keys". |
536 | 535 |
|
537 | | - if ((levels is None and isinstance(keys[0], tuple)) or |
538 | | - (levels is not None and len(levels) > 1)): |
539 | | - zipped = compat.lzip(*keys) |
540 | | - if names is None: |
541 | | - names = [None] * len(zipped) |
542 | | - |
543 | | - if levels is None: |
544 | | - _, levels = _factorize_from_iterables(zipped) |
545 | | - else: |
546 | | - levels = [ensure_index(x) for x in levels] |
547 | | - else: |
548 | | - zipped = [keys] |
549 | | - if names is None: |
550 | | - names = [None] |
551 | | - |
552 | | - if levels is None: |
553 | | - levels = [ensure_index(keys)] |
554 | | - else: |
555 | | - levels = [ensure_index(x) for x in levels] |
556 | | - |
557 | | - if not _all_indexes_same(indexes): |
558 | | - codes_list = [] |
559 | | - |
560 | | - # things are potentially different sizes, so compute the exact codes |
561 | | - # for each level and pass those to MultiIndex.from_arrays |
562 | | - |
563 | | - for hlevel, level in zip(zipped, levels): |
564 | | - to_concat = [] |
565 | | - for key, index in zip(hlevel, indexes): |
566 | | - try: |
567 | | - i = level.get_loc(key) |
568 | | - except KeyError: |
569 | | - raise ValueError('Key {key!s} not in level {level!s}' |
570 | | - .format(key=key, level=level)) |
571 | | - |
572 | | - to_concat.append(np.repeat(i, len(index))) |
573 | | - codes_list.append(np.concatenate(to_concat)) |
| 536 | + Parameters |
| 537 | + ---------- |
| 538 | + indexes : sequence of Index (or subclass) instances. |
| 539 | + Pieces of new Index. |
| 540 | + keys : sequence of labels, same length as "indexes". |
| 541 | + Labels used to index the pieces in "indexes". |
| 542 | + levels : list of sequences, default None |
| 543 | + Used to override the ".levels" in the resulting hierarchical index. |
| 544 | + names : list, default None |
| 545 | + Names for the levels in the resulting hierarchical index. |
574 | 546 |
|
575 | | - concat_index = _concat_indexes(indexes) |
| 547 | + Returns |
| 548 | + ------- |
| 549 | + concatenated : MultiIndex |
576 | 550 |
|
577 | | - # these go at the end |
578 | | - if isinstance(concat_index, MultiIndex): |
579 | | - levels.extend(concat_index.levels) |
580 | | - codes_list.extend(concat_index.codes) |
581 | | - else: |
582 | | - codes, categories = _factorize_from_iterable(concat_index) |
583 | | - levels.append(categories) |
584 | | - codes_list.append(codes) |
| 551 | + """ |
585 | 552 |
|
586 | | - if len(names) == len(levels): |
587 | | - names = list(names) |
| 553 | + orig = _concat_indexes(indexes) |
| 554 | + |
| 555 | + # Simplest way to create and prepend the keys level(s): |
| 556 | + keys_chunks = [([key] * len(idx)) for (key, idx) in zip(keys, indexes)] |
| 557 | + keys_levs = Index([i for l in keys_chunks for i in l], |
| 558 | + tupleize_cols=True) |
| 559 | + tot_df = concat([keys_levs.to_frame().reset_index(drop=True), |
| 560 | + orig.to_frame().reset_index(drop=True)], axis=1) |
| 561 | + temp_names = [None]*keys_levs.nlevels + list(orig.names) |
| 562 | + result = MultiIndex.from_frame(tot_df, names=temp_names) |
| 563 | + |
| 564 | + if names is not None: |
| 565 | + if len(names) == keys_levs.nlevels: |
| 566 | + # Received only names for keys level(s) |
| 567 | + result.names = list(names) + list(result.names)[len(names):] |
588 | 568 | else: |
589 | | - # make sure that all of the passed indices have the same nlevels |
590 | | - if not len({idx.nlevels for idx in indexes}) == 1: |
591 | | - raise AssertionError("Cannot concat indices that do" |
592 | | - " not have the same number of levels") |
593 | | - |
594 | | - # also copies |
595 | | - names = names + _get_consensus_names(indexes) |
596 | | - |
597 | | - return MultiIndex(levels=levels, codes=codes_list, names=names, |
598 | | - verify_integrity=False) |
599 | | - |
600 | | - new_index = indexes[0] |
601 | | - n = len(new_index) |
602 | | - kpieces = len(indexes) |
603 | | - |
604 | | - # also copies |
605 | | - new_names = list(names) |
606 | | - new_levels = list(levels) |
607 | | - |
608 | | - # construct codes |
609 | | - new_codes = [] |
610 | | - |
611 | | - # do something a bit more speedy |
612 | | - |
613 | | - for hlevel, level in zip(zipped, levels): |
614 | | - hlevel = ensure_index(hlevel) |
615 | | - mapped = level.get_indexer(hlevel) |
616 | | - |
617 | | - mask = mapped == -1 |
618 | | - if mask.any(): |
619 | | - raise ValueError('Values not found in passed level: {hlevel!s}' |
620 | | - .format(hlevel=hlevel[mask])) |
621 | | - |
622 | | - new_codes.append(np.repeat(mapped, n)) |
623 | | - |
624 | | - if isinstance(new_index, MultiIndex): |
625 | | - new_levels.extend(new_index.levels) |
626 | | - new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes]) |
627 | | - else: |
628 | | - new_levels.append(new_index) |
629 | | - new_codes.append(np.tile(np.arange(n), kpieces)) |
630 | | - |
631 | | - if len(new_names) < len(new_levels): |
632 | | - new_names.extend(new_index.names) |
633 | | - |
634 | | - return MultiIndex(levels=new_levels, codes=new_codes, names=new_names, |
635 | | - verify_integrity=False) |
| 569 | + # Received names for all levels |
| 570 | + result.names = names |
| 571 | + |
| 572 | + if levels is not None: |
| 573 | + for i, level in enumerate(levels): |
| 574 | + if level is None: |
| 575 | + continue |
| 576 | + cur_lev = result.levels[i] |
| 577 | + new_lev = Index(level) |
| 578 | + not_found = np.where(new_lev.get_indexer(cur_lev) == -1)[0] |
| 579 | + |
| 580 | + if len(not_found): |
| 581 | + missing = [level[i] for i in not_found] |
| 582 | + raise ValueError("Values not found in passed level: " |
| 583 | + "{missing!s}" |
| 584 | + .format(missing=missing)) |
| 585 | + cur_val = result.get_level_values(i) |
| 586 | + result = (result.set_levels(new_lev, level=i) |
| 587 | + .set_labels(list(new_lev.get_indexer_for(cur_val)), |
| 588 | + level=i)) |
| 589 | + |
| 590 | + return result |
0 commit comments