Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions docs/constraints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,42 @@ These constraints are valid on `str` types:
File "<stdin>", line 1, in <module>
msgspec.ValidationError: Expected `str` matching regex '^[a-z0-9_]*$'


Third-Party Regex Engines
++++++++++++++++++++++++++

It is possible to use a third-party regex engine, by providing an object to ``pattern``
that implements an :func:`re.Pattern`-like protocol. In particular, it must conform to
the following interface:

.. code-block:: python

class Pattern:
pattern: str
"""The raw pattern string"""

def search(
self,
string: str,
pos: int = 0,
endpos: int = sys.maxsize,
) -> object | None: ...
"""
Match `string` against the pattern. When no match is found, return `None`.
Otherwise, any non-`None` object may be returned
"""

.. code-block:: python

import msgspec
import re2

msgspec.json.decode(
b'"invalid username"',
type=Annotated[str, msgspec.Meta(pattern=re2.Pattern("^[a-z0-9_]*$"))]
)


.. _datetime-constraints:

Datetime Constraints
Expand Down
1 change: 1 addition & 0 deletions src/msgspec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ def field(*, default=NODEFAULT, default_factory=NODEFAULT, name=None):

from . import inspect, json, msgpack, structs, toml, yaml
from ._version import __version__
from ._utils import RegexPatternProtocol
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to publish this?

5 changes: 3 additions & 2 deletions src/msgspec/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ from typing import (
from typing_extensions import Buffer, dataclass_transform

from . import inspect, json, msgpack, structs, toml, yaml
from ._utils import RegexPatternProtocol

# PEP 673 explicitly rejects using Self in metaclass definitions:
# https://peps.python.org/pep-0673/#valid-locations-for-self
Expand Down Expand Up @@ -166,7 +167,7 @@ class Meta:
lt: Union[int, float, None] = None,
le: Union[int, float, None] = None,
multiple_of: Union[int, float, None] = None,
pattern: Union[str, None] = None,
pattern: Union[str, RegexPatternProtocol, None] = None,
min_length: Union[int, None] = None,
max_length: Union[int, None] = None,
tz: Union[bool, None] = None,
Expand All @@ -181,7 +182,7 @@ class Meta:
lt: Final[Union[int, float, None]]
le: Final[Union[int, float, None]]
multiple_of: Final[Union[int, float, None]]
pattern: Final[Union[str, None]]
pattern: Final[Union[str, RegexPatternProtocol, None]]
min_length: Final[Union[int, None]]
max_length: Final[Union[int, None]]
tz: Final[Union[int, None]]
Expand Down
29 changes: 25 additions & 4 deletions src/msgspec/_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1730,6 +1730,20 @@ ensure_is_finite_numeric(PyObject *val, const char *param, bool positive) {
return true;
}

static bool
ensure_is_re_pattern_compatible (PyObject *val, const char *param) {
if (PyUnicode_CheckExact(val)) return true;
if (PyObject_HasAttrString(val, "pattern") && PyObject_HasAttrString(val, "search")) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is currently very rudimentary. How strict do we want to be with this? Another option would be to make the RegexPatternProtocol runtime checkable and then do an isinstance check here. That would certainly be more robust, but is it desirable?

return true;
}
PyErr_Format(
PyExc_TypeError,
"`%s` must be a str, or an re.Pattern like type, got %.200s",
param, Py_TYPE(val)->tp_name
);
return false;
}

PyDoc_STRVAR(Meta__doc__,
"Meta(*, gt=None, ge=None, lt=None, le=None, multiple_of=None, pattern=None, "
"min_length=None, max_length=None, tz=None, title=None, description=None, "
Expand Down Expand Up @@ -1846,7 +1860,7 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {
if (lt != NULL && !ensure_is_finite_numeric(lt, "lt", false)) return NULL;
if (le != NULL && !ensure_is_finite_numeric(le, "le", false)) return NULL;
if (multiple_of != NULL && !ensure_is_finite_numeric(multiple_of, "multiple_of", true)) return NULL;
if (pattern != NULL && !ensure_is_string(pattern, "pattern")) return NULL;
if (pattern != NULL && !ensure_is_re_pattern_compatible(pattern, "pattern")) return NULL;
if (min_length != NULL && !ensure_is_nonnegative_integer(min_length, "min_length")) return NULL;
if (max_length != NULL && !ensure_is_nonnegative_integer(max_length, "max_length")) return NULL;
if (tz != NULL && !ensure_is_bool(tz, "tz")) return NULL;
Expand Down Expand Up @@ -1901,9 +1915,16 @@ Meta_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) {

/* regex compile pattern if provided */
if (pattern != NULL) {
MsgspecState *mod = msgspec_get_global_state();
regex = PyObject_CallOneArg(mod->re_compile, pattern);
if (regex == NULL) return NULL;
if (PyUnicode_Check(pattern)) {
// 'pattern' is a string; compile it using re.compile
MsgspecState *mod = msgspec_get_global_state();
regex = PyObject_CallOneArg(mod->re_compile, pattern);
if (regex == NULL) return NULL;
} else {
// 'pattern' is an re.Pattern like object. no need to do anything
// do no Py_INCREF here, since will be done during SET_FIELD later
regex = pattern;
}
}

Meta *out = (Meta *)Meta_Type.tp_alloc(&Meta_Type, 0);
Expand Down
11 changes: 11 additions & 0 deletions src/msgspec/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,14 @@ def inner(obj):
def rebuild(cls, kwargs):
"""Used to unpickle Structs with keyword-only fields"""
return cls(**kwargs)


class RegexPatternProtocol(typing.Protocol):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've kept the required API as minimal as possible here.

pattern: str

def search(
self,
string: str,
pos: int = 0,
endpos: int = sys.maxsize,
) -> object | None: ...
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Returning object | None reflects how it's being used internally. This allows greater flexibility, since regex engines mustn't also comply with the re.Match protocol, and we're not doing anything with the match anyway

23 changes: 21 additions & 2 deletions tests/unit/test_constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def test_nonnegative_integer_fields(self, field):
with pytest.raises(ValueError, match=f"{field}` must be >= 0, got -10"):
Meta(**{field: -10})

@pytest.mark.parametrize("field", ["pattern", "title", "description"])
@pytest.mark.parametrize("field", ["title", "description"])
def test_string_fields(self, field):
Meta(**{field: "good"})
with pytest.raises(TypeError, match=f"`{field}` must be a str, got bytes"):
Expand Down Expand Up @@ -202,6 +202,14 @@ def test_invalid_pattern_errors(self):
with pytest.raises(re.error):
Meta(pattern="[abc")

@pytest.mark.parametrize("good", ("string", re.compile("string")))
def test_pattern_valid_type(self, good):
Meta(pattern=good)

def test_pattern_invalid_type(self):
with pytest.raises(TypeError, match=f"`pattern` must be a str, or an re.Pattern like type, got bytes"):
Meta(pattern=b"bad")

def test_conflicting_bounds_errors(self):
with pytest.raises(ValueError, match="both `gt` and `ge`"):
Meta(gt=0, ge=1)
Expand Down Expand Up @@ -443,6 +451,14 @@ class Ex(msgspec.Struct):
assert dec.decode(proto.encode(Ex(x)))


class CustomRegexPattern:
def __init__(self, pattern: str) -> None:
self.pattern = pattern

def search(self, v: str) -> re.Match[str] | None:
return re.search(self.pattern, v)


class TestStrConstraints:
def test_min_length(self, proto):
class Ex(msgspec.Struct):
Expand Down Expand Up @@ -478,6 +494,8 @@ class Ex(msgspec.Struct):
("", ["", "test"], []),
("as", ["as", "ease", "ast", "pass"], ["", "nope"]),
("^pre[123]*$", ["pre1", "pre123"], ["apre1", "pre1two"]),
pytest.param(re.compile("as"), ["as"], ["nope"], id="re.Pattern"),
pytest.param(CustomRegexPattern("as"), ["as"], ["nope"], id="CustomPattern"),
],
)
def test_pattern(self, proto, pattern, good, bad):
Expand All @@ -489,7 +507,8 @@ class Ex(msgspec.Struct):
for x in good:
assert dec.decode(proto.encode(Ex(x))).x == x

err_msg = f"Expected `str` matching regex {pattern!r} - at `$.x`"
raw_pattern = pattern if isinstance(pattern, str) else pattern.pattern
err_msg = f"Expected `str` matching regex {raw_pattern!r} - at `$.x`"
for x in bad:
with pytest.raises(msgspec.ValidationError) as rec:
dec.decode(proto.encode(Ex(x)))
Expand Down
Loading