Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mypy/typeshed/stubs/librt/librt/strings.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ def isspace(c: i32, /) -> bool: ...
def isdigit(c: i32, /) -> bool: ...
def isalnum(c: i32, /) -> bool: ...
def isalpha(c: i32, /) -> bool: ...
def isidentifier(c: i32, /) -> bool: ...
12 changes: 5 additions & 7 deletions mypyc/lib-rt/codepoint_extra_ops.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// All codepoint helper bodies live in codepoint_extra_ops.h as static
// inline. This translation unit exists so the header is pulled into
// mypyc-compiled extensions via SourceDep("codepoint_extra_ops.c") in
// mypyc/ir/deps.py (which, in include_runtime_files mode, emits
// `#include <codepoint_extra_ops.c>` into the generated __native.c).
#include "codepoint_extra_ops.h"

// Out-of-line bodies for codepoint helpers that are too large to inline.
// The classification helpers and the ASCII fast paths for case conversion
// stay inline in codepoint_extra_ops.h; this file holds the slow paths
// that round-trip through PyUnicode_FromOrdinal and CPython's Unicode
// machinery. Currently empty; populated as later commits add
// isidentifier, toupper, and tolower.
23 changes: 23 additions & 0 deletions mypyc/lib-rt/codepoint_extra_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <Python.h>
#include <stdbool.h>
#include <stdint.h>
#include "CPy.h"

// Codepoint helpers for librt.strings.
// Inputs are signed int32_t for compatibility with mypyc's i32 type.
Expand All @@ -25,4 +26,26 @@ static inline bool LibRTStrings_IsAlpha(int32_t c) {
return c >= 0 && Py_UNICODE_ISALPHA((Py_UCS4)c);
}

// True if c could start a valid identifier (matches XID_Start
// semantics, which is what str.isidentifier reports for a 1-character
// string). The ASCII fast path covers `[A-Za-z_]` inline; non-ASCII
// delegates to PyUnicode_IsIdentifier for correct PEP 3131 handling.
// Aborts via CPyError_OutOfMemory on allocation failure, so this helper
// stays ERR_NEVER.
static inline bool LibRTStrings_IsIdentifier(int32_t c) {
if (c < 0) return false;
if (c < 128) {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_';
}
PyObject *s = PyUnicode_FromOrdinal((int)c);
if (s == NULL) {
CPyError_OutOfMemory();
}
int r = PyUnicode_IsIdentifier(s);
Py_DECREF(s);
return r == 1;
}

#endif // MYPYC_CODEPOINT_EXTRA_OPS_H
4 changes: 4 additions & 0 deletions mypyc/lib-rt/strings/librt_strings.c
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,7 @@ DEFINE_CP_BOOL_WRAPPER(isspace, LibRTStrings_IsSpace)
DEFINE_CP_BOOL_WRAPPER(isdigit, LibRTStrings_IsDigit)
DEFINE_CP_BOOL_WRAPPER(isalnum, LibRTStrings_IsAlnum)
DEFINE_CP_BOOL_WRAPPER(isalpha, LibRTStrings_IsAlpha)
DEFINE_CP_BOOL_WRAPPER(isidentifier, LibRTStrings_IsIdentifier)

static PyMethodDef librt_strings_module_methods[] = {
{"write_i16_le", (PyCFunction) write_i16_le, METH_FASTCALL,
Expand Down Expand Up @@ -1268,6 +1269,9 @@ static PyMethodDef librt_strings_module_methods[] = {
{"isalpha", cp_isalpha, METH_O,
PyDoc_STR("Test whether a codepoint (i32) is a Unicode letter.")
},
{"isidentifier", cp_isidentifier, METH_O,
PyDoc_STR("Test whether a codepoint (i32) is a valid identifier start (XID_Start).")
},
{NULL, NULL, 0, NULL}
};

Expand Down
12 changes: 12 additions & 0 deletions mypyc/primitives/librt_strings_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,3 +431,15 @@
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
)

# isidentifier checks XID_Start semantics for a single codepoint, matching
# str.isidentifier() on a 1-character string. The non-ASCII path allocates
# but swallows OOM (returning False), keeping the function ERR_NEVER.
function_op(
name="librt.strings.isidentifier",
arg_types=[int32_rprimitive],
return_type=bool_rprimitive,
c_function_name="LibRTStrings_IsIdentifier",
error_kind=ERR_NEVER,
dependencies=[LIBRT_STRINGS, CODEPOINT_EXTRA_OPS],
)
14 changes: 14 additions & 0 deletions mypyc/test-data/irbuild-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -387,3 +387,17 @@ def is_a(c):
L0:
r0 = LibRTStrings_IsAlpha(c)
return r0

[case testLibrtStringsIsIdentifierIR]
from librt.strings import isidentifier
from mypy_extensions import i32

def is_id(c: i32) -> bool:
return isidentifier(c)
[out]
def is_id(c):
c :: i32
r0 :: bool
L0:
r0 = LibRTStrings_IsIdentifier(c)
return r0
5 changes: 4 additions & 1 deletion mypyc/test-data/run-librt-strings.test
Original file line number Diff line number Diff line change
Expand Up @@ -1443,7 +1443,7 @@ def test_new_without_init_is_usable() -> None:
[case testLibrtStringsCodepointClassifiers_librt]
from typing import Any
from mypy_extensions import i32
from librt.strings import isspace, isdigit, isalnum, isalpha
from librt.strings import isspace, isdigit, isalnum, isalpha, isidentifier

from testutil import assertRaises

Expand All @@ -1455,6 +1455,7 @@ def test_codepoint_classifiers() -> None:
assert not isdigit(bad)
assert not isalnum(bad)
assert not isalpha(bad)
assert not isidentifier(bad)
# Verify each codepoint primitive agrees with the matching str method
# across all Unicode codepoints, including the ord(chr(i)) round-trip.
# Any forces generic dispatch on the str side.
Expand All @@ -1466,6 +1467,7 @@ def test_codepoint_classifiers() -> None:
assert isdigit(o) == isdigit(i) == a.isdigit()
assert isalnum(o) == isalnum(i) == a.isalnum()
assert isalpha(o) == isalpha(i) == a.isalpha()
assert isidentifier(o) == isidentifier(i) == a.isidentifier()


def test_codepoint_classifiers_via_any() -> None:
Expand All @@ -1476,6 +1478,7 @@ def test_codepoint_classifiers_via_any() -> None:
(isdigit, "5", "a"),
(isalnum, "A", " "),
(isalpha, "A", " "),
(isidentifier, "A", "0"),
):
f: Any = fn
assert f(ord(true_input)) is True
Expand Down
Loading