Skip to content
10 changes: 8 additions & 2 deletions pandas/core/computation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _check_for_locals(expr, stack_level, parser):

def eval(expr, parser='pandas', engine=None, truediv=True,
local_dict=None, global_dict=None, resolvers=(), level=0,
target=None, inplace=False):
target=None, inplace=False, partial_str_match: bool = False):
"""Evaluate a Python expression as a string using various backends.

The following arithmetic operations are supported: ``+``, ``-``, ``*``,
Expand Down Expand Up @@ -220,6 +220,11 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
If `target` is provided, and the expression mutates `target`, whether
to modify `target` inplace. Otherwise, return a copy of `target` with
the mutation.
partial_str_match : bool, optional, default False
If this is True, an `expr` like "string_query in list_like_of_strings"
is interpreted as partial string match (the default behavior is exact
matching).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a "versionadded" tag in the docstring.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, I added.

.. versionadded:: 0.25.0

Returns
-------
Expand Down Expand Up @@ -290,7 +295,8 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
target=target)

parsed_expr = Expr(expr, engine=engine, parser=parser, env=env,
truediv=truediv)
truediv=truediv,
partial_str_match=partial_str_match)

# construct the engine and evaluate the parsed expression
eng = _engines[engine]
Expand Down
52 changes: 44 additions & 8 deletions pandas/core/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,18 @@ def _is_type(t):
_is_str = _is_type(str)


def _is_series_of_str(term):
"""Test whether a ``Term`` holds list-like value of ``string_types``"""
return _is_type(pd.Series)(term) and isinstance(term.value[0], str)


def _do_partial_str_match(op, left_term, right_term):
"""If True execute partial string match instead of membership testing"""
return (is_term(left_term) and is_term(right_term) and
op.__class__.__name__ in ['In', 'NotIn'] and
_is_str(left_term) and _is_series_of_str(right_term))


# partition all AST nodes
_all_nodes = frozenset(filter(lambda x: isinstance(x, type) and
issubclass(x, ast.AST),
Expand Down Expand Up @@ -327,6 +339,7 @@ class BaseExprVisitor(ast.NodeVisitor):
engine : str
parser : str
preparser : callable
partial_str_match : bool
"""
const_type = Constant
term_type = Term
Expand All @@ -348,12 +361,14 @@ class BaseExprVisitor(ast.NodeVisitor):
ast.NotIn: ast.NotIn
}

def __init__(self, env, engine, parser, preparser=_preparse):
def __init__(self, env, engine, parser, preparser=_preparse,
partial_str_match=False):
self.env = env
self.engine = engine
self.parser = parser
self.preparser = preparser
self.assigner = None
self.partial_str_match = partial_str_match

def visit(self, node, **kwargs):
if isinstance(node, str):
Expand Down Expand Up @@ -703,6 +718,23 @@ def visit_Compare(self, node, **kwargs):
# base case: we have something like a CMP b
if len(comps) == 1:
op = self.translate_In(ops[0])

if self.partial_str_match:
# partial string match (case sensitive):
# when the left node is `Term` with a str value,
# right `Term` with a value of Series containing str
# and `op` is `In` or `NotIn`,
# we skip ordinary binary ops and apply `str_contains`.
left = self.visit(node.left)
right = self.visit(comps[0])
if _do_partial_str_match(op, left, right):
from pandas.core.strings import str_contains
_res = str_contains(right.value, left.value, regex=False)
if op.__class__.__name__ == "NotIn":
_res = ~_res
name = self.env.add_tmp(_res)
return self.term_type(name, env=self.env)

binop = ast.BinOp(op=op, left=node.left, right=comps[0])
return self.visit(binop)

Expand Down Expand Up @@ -754,16 +786,18 @@ class PandasExprVisitor(BaseExprVisitor):
def __init__(self, env, engine, parser,
preparser=partial(_preparse, f=_compose(
_replace_locals, _replace_booleans,
_clean_spaces_backtick_quoted_names))):
super(PandasExprVisitor, self).__init__(env, engine, parser, preparser)
_clean_spaces_backtick_quoted_names)),
partial_str_match=False):
super().__init__(env, engine, parser, preparser, partial_str_match)


@disallow(_unsupported_nodes | _python_not_supported | frozenset(['Not']))
class PythonExprVisitor(BaseExprVisitor):

def __init__(self, env, engine, parser, preparser=lambda x: x):
super(PythonExprVisitor, self).__init__(env, engine, parser,
preparser=preparser)
def __init__(self, env, engine, parser, preparser=lambda x: x,
partial_str_match=False):
super().__init__(env, engine, parser, preparser=preparser,
partial_str_match=partial_str_match)


class Expr(StringMixin):
Expand All @@ -778,16 +812,18 @@ class Expr(StringMixin):
env : Scope, optional, default None
truediv : bool, optional, default True
level : int, optional, default 2
partial_str_match : bool, optional, default False
"""

def __init__(self, expr, engine='numexpr', parser='pandas', env=None,
truediv=True, level=0):
truediv=True, level=0, partial_str_match=False):
self.expr = expr
self.env = env or Scope(level=level + 1)
self.engine = engine
self.parser = parser
self.env.scope['truediv'] = truediv
self._visitor = _parsers[parser](self.env, self.engine, self.parser)
self._visitor = _parsers[parser](self.env, self.engine, self.parser,
partial_str_match=partial_str_match)
self.terms = self.parse()

@property
Expand Down
Loading