123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554 |
- From ba6e2c38aafc23cf114f3ba0d0ff3baead34328b Mon Sep 17 00:00:00 2001
- From: Yves Orton <demerphq@gmail.com>
- Date: Tue, 1 Aug 2023 23:12:46 +0200
- Subject: [PATCH] regcomp*.c, regexec.c - fixup regex engine build under
- -Uusedl
- The regex engine is built a bit different from most of the perl
- codebase. It is compiled as part of the main libperl.so and it is
- also compiled (with DEBUGGING enabled) as part of the re extension.
- When perl itself is compiled with DEBUGGING enabled then the code
- in the re.so extension and the code in libperl.so is the same.
- This all works fine and dandy until you have a static build where the
- re.so is linked into libperl.so, which results in duplicate symbols
- being defined. These symbols come in two flaviours: "auxiliary" and
- "debugging" related symbols.
- We have basically three cases:
- 1. USE_DYNAMIC_LOADING is defined. In this case we are doing a dynamic
- build and re.so will be separate from libperl.so, so it even if this
- is a DEBUGGING enabled build debug and auxiliary functions can be
- compiled into *both* re.so and libperl.so. This is basically the
- "standard build".
- 2. USE_DYNAMIC_LOADING is not defined, and DEBUGGING is not defined
- either. In this case auxiliary functions should only be compiled in
- libperl.so, and the debug functions should only be compiled into
- re.so
- 3. USE_DYNAMIC_LOADING is not defined, and DEBUGGING *is* defined. In
- this case auxiliary functions AND debug functions should only be
- compiled into libperl.so
- It is possible to detect the different build options by looking at the
- defines 'USE_DYNAMIC_LOADING', 'PERL_EXT_RE_DEBUG' and
- 'DEBUGGING_RE_ONLY'. 'USE_DYNAMIC_LOADING' is NOT defined when we are
- building a static perl. 'PERL_EXT_RE_DEBUG' is defined only when we are
- building re.so, and 'DEBUGGING_RE_ONLY' is defined only when we are
- building re.so in a perl that is not itself already a DEBUGGING enabled
- perl. The file ext/re/re_top.h responsible for setting up
- DEBUGGING_RE_ONLY.
- This patch uses 'PERL_EXT_RE_DEBUG', 'DEBUGGING_RE_ONLY' and
- 'USE_DYNAMIC_LOADING' to define in regcomp.h two further define flags
- 'PERL_RE_BUILD_DEBUG' and 'PERL_RE_BUILD_AUX'.
- The 'PERL_RE_BUILD_DEBUG' flag determines if the debugging functions
- should be compiled into libperl.so or re.so or both. The
- 'PERL_RE_BUILD_AUX' flag determines if the auxiliary functions should be
- compiled into just libperl.so or into it and re.so. We then use these
- flags to guard the different types of functions so that we can build in
- all three modes without duplicate symbols.
- Upstream: https://github.com/Perl/perl5/commit/ba6e2c38aafc23cf114f3ba0d0ff3baead34328b
- Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
- ---
- regcomp.c | 13 +-
- regcomp.h | 14 ++-
- regcomp_debug.c | 308 +++++++++++++++++++++++-----------------------
- regcomp_invlist.c | 3 +-
- regexec.c | 3 +-
- 5 files changed, 181 insertions(+), 160 deletions(-)
- diff --git a/regcomp.c b/regcomp.c
- index d3c135fbfad1..6e35d95d2ac6 100644
- --- a/regcomp.c
- +++ b/regcomp.c
- @@ -290,6 +290,7 @@ S_edit_distance(const UV* src,
- /* END of edit_distance() stuff
- * ========================================================= */
-
- +#ifdef PERL_RE_BUILD_AUX
- /* add a data member to the struct reg_data attached to this regex, it should
- * always return a non-zero return. the 's' argument is the type of the items
- * being added and the n is the number of items. The length of 's' should match
- @@ -340,6 +341,7 @@ Perl_reg_add_data(RExC_state_t* const pRExC_state, const char* const s, const U3
- assert(count>0);
- return count;
- }
- +#endif /* PERL_RE_BUILD_AUX */
-
- /*XXX: todo make this not included in a non debugging perl, but appears to be
- * used anyway there, in 'use re' */
- @@ -7443,6 +7445,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
- }
-
-
- +#ifdef PERL_RE_BUILD_AUX
- void
- Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
- {
- @@ -7502,6 +7505,7 @@ Perl_populate_anyof_bitmap_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
- }
- }
- }
- +#endif /* PERL_RE_BUILD_AUX */
-
- /* Parse POSIX character classes: [[:foo:]], [[=foo=]], [[.foo.]].
- Character classes ([:foo:]) can also be negated ([:^foo:]).
- @@ -9095,6 +9099,7 @@ S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
- #undef IS_OPERATOR
- #undef IS_OPERAND
-
- +#ifdef PERL_RE_BUILD_AUX
- void
- Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invlist)
- {
- @@ -9182,6 +9187,8 @@ Perl_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** i
- }
- }
- }
- +#endif /* PERL_RE_BUILD_AUX */
- +
-
- STATIC void
- S_output_posix_warnings(pTHX_ RExC_state_t *pRExC_state, AV* posix_warnings)
- @@ -12105,6 +12112,7 @@ S_optimize_regclass(pTHX_
-
- #undef HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION
-
- +#ifdef PERL_RE_BUILD_AUX
- void
- Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
- regnode* const node,
- @@ -12261,6 +12269,7 @@ Perl_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
- RExC_rxi->data->data[n] = (void*)rv;
- ARG1u_SET(node, n);
- }
- +#endif /* PERL_RE_BUILD_AUX */
-
- SV *
-
- @@ -12999,6 +13008,8 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
- }
- #endif
-
- +
- +#ifdef PERL_RE_BUILD_AUX
- SV*
- Perl_get_ANYOFM_contents(pTHX_ const regnode * n) {
-
- @@ -13047,7 +13058,7 @@ Perl_get_ANYOFHbbm_contents(pTHX_ const regnode * n) {
- UTF_CONTINUATION_MARK | 0));
- return cp_list;
- }
- -
- +#endif /* PERL_RE_BUILD_AUX */
-
-
- SV *
- diff --git a/regcomp.h b/regcomp.h
- index 31c91e6a68e8..017a9f843514 100644
- --- a/regcomp.h
- +++ b/regcomp.h
- @@ -1554,7 +1554,19 @@ typedef enum {
- #define EVAL_OPTIMISTIC_FLAG 128
- #define EVAL_FLAGS_MASK (EVAL_OPTIMISTIC_FLAG-1)
-
- -
- +/* We define PERL_RE_BUILD_DEBUG if we are NOT compiling the re extension and
- + * we are under DEBUGGING, or if we are ARE compiling the re extension
- + * and this is not a DEBUGGING enabled build (identified by
- + * DEBUGGING_RE_ONLY being defined)
- + */
- +#if ( defined(USE_DYNAMIC_LOADING) && defined(DEBUGGING)) || \
- + ( defined(PERL_EXT_RE_BUILD) && defined(DEBUGGING_RE_ONLY)) || \
- + (!defined(PERL_EXT_RE_BUILD) && defined(DEBUGGING))
- +#define PERL_RE_BUILD_DEBUG
- +#endif
- +#if ( defined(USE_DYNAMIC_LOADING) || !defined(PERL_EXT_RE_BUILD) )
- +#define PERL_RE_BUILD_AUX
- +#endif
-
- #endif /* PERL_REGCOMP_H_ */
-
- diff --git a/regcomp_debug.c b/regcomp_debug.c
- index 93db7a89cf48..96598c49c0bc 100644
- --- a/regcomp_debug.c
- +++ b/regcomp_debug.c
- @@ -18,8 +18,7 @@
- #include "unicode_constants.h"
- #include "regcomp_internal.h"
-
- -#ifdef DEBUGGING
- -
- +#ifdef PERL_RE_BUILD_DEBUG
- int
- Perl_re_printf(pTHX_ const char *fmt, ...)
- {
- @@ -159,13 +158,160 @@ Perl_debug_peep(pTHX_ const char *str, const RExC_state_t *pRExC_state,
- });
- }
-
- -#endif /* DEBUGGING */
- +const regnode *
- +Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
- + const regnode *last, const regnode *plast,
- + SV* sv, I32 indent, U32 depth)
- +{
- + const regnode *next;
- + const regnode *optstart= NULL;
- +
- + RXi_GET_DECL(r, ri);
- + DECLARE_AND_GET_RE_DEBUG_FLAGS;
- +
- + PERL_ARGS_ASSERT_DUMPUNTIL;
- +
- +#ifdef DEBUG_DUMPUNTIL
- + Perl_re_printf( aTHX_ "--- %d : %d - %d - %d\n", indent, node-start,
- + last ? last-start : 0, plast ? plast-start : 0);
- +#endif
- +
- + if (plast && plast < last)
- + last= plast;
- +
- + while (node && (!last || node < last)) {
- + const U8 op = OP(node);
- +
- + if (op == CLOSE || op == SRCLOSE || op == WHILEM)
- + indent--;
- + next = regnext((regnode *)node);
- + const regnode *after = regnode_after((regnode *)node,0);
- +
- + /* Where, what. */
- + if (op == OPTIMIZED) {
- + if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
- + optstart = node;
- + else
- + goto after_print;
- + } else
- + CLEAR_OPTSTART;
- +
- + regprop(r, sv, node, NULL, NULL);
- + Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start),
- + (int)(2*indent + 1), "", SvPVX_const(sv));
- +
- + if (op != OPTIMIZED) {
- + if (next == NULL) /* Next ptr. */
- + Perl_re_printf( aTHX_ " (0)");
- + else if (REGNODE_TYPE(op) == BRANCH
- + && REGNODE_TYPE(OP(next)) != BRANCH )
- + Perl_re_printf( aTHX_ " (FAIL)");
- + else
- + Perl_re_printf( aTHX_ " (%" IVdf ")", (IV)(next - start));
- + Perl_re_printf( aTHX_ "\n");
- + }
- +
- + after_print:
- + if (REGNODE_TYPE(op) == BRANCHJ) {
- + assert(next);
- + const regnode *nnode = (OP(next) == LONGJMP
- + ? regnext((regnode *)next)
- + : next);
- + if (last && nnode > last)
- + nnode = last;
- + DUMPUNTIL(after, nnode);
- + }
- + else if (REGNODE_TYPE(op) == BRANCH) {
- + assert(next);
- + DUMPUNTIL(after, next);
- + }
- + else if ( REGNODE_TYPE(op) == TRIE ) {
- + const regnode *this_trie = node;
- + const U32 n = ARG1u(node);
- + const reg_ac_data * const ac = op>=AHOCORASICK ?
- + (reg_ac_data *)ri->data->data[n] :
- + NULL;
- + const reg_trie_data * const trie =
- + (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
- +#ifdef DEBUGGING
- + AV *const trie_words
- + = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
- +#endif
- + const regnode *nextbranch= NULL;
- + I32 word_idx;
- + SvPVCLEAR(sv);
- + for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
- + SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);
- +
- + Perl_re_indentf( aTHX_ "%s ",
- + indent+3,
- + elem_ptr
- + ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
- + SvCUR(*elem_ptr), PL_dump_re_max_len,
- + PL_colors[0], PL_colors[1],
- + (SvUTF8(*elem_ptr)
- + ? PERL_PV_ESCAPE_UNI
- + : 0)
- + | PERL_PV_PRETTY_ELLIPSES
- + | PERL_PV_PRETTY_LTGT
- + )
- + : "???"
- + );
- + if (trie->jump) {
- + U16 dist= trie->jump[word_idx+1];
- + Perl_re_printf( aTHX_ "(%" UVuf ")\n",
- + (UV)((dist ? this_trie + dist : next) - start));
- + if (dist) {
- + if (!nextbranch)
- + nextbranch= this_trie + trie->jump[0];
- + DUMPUNTIL(this_trie + dist, nextbranch);
- + }
- + if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
- + nextbranch= regnext((regnode *)nextbranch);
- + } else {
- + Perl_re_printf( aTHX_ "\n");
- + }
- + }
- + if (last && next > last)
- + node= last;
- + else
- + node= next;
- + }
- + else if ( op == CURLY ) { /* "next" might be very big: optimizer */
- + DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
- + }
- + else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
- + assert(next);
- + DUMPUNTIL(after, next);
- + }
- + else if ( op == PLUS || op == STAR) {
- + DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
- + }
- + else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
- + /* Literal string, where present. */
- + node = (const regnode *)REGNODE_AFTER_varies(node);
- + }
- + else {
- + node = REGNODE_AFTER_opcode(node,op);
- + }
- + if (op == CURLYX || op == OPEN || op == SROPEN)
- + indent++;
- + if (REGNODE_TYPE(op) == END)
- + break;
- + }
- + CLEAR_OPTSTART;
- +#ifdef DEBUG_DUMPUNTIL
- + Perl_re_printf( aTHX_ "--- %d\n", (int)indent);
- +#endif
- + return node;
- +}
- +
- +#endif /* PERL_RE_BUILD_DEBUG */
-
- /*
- - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
- */
- #ifdef DEBUGGING
- -
- static void
- S_regdump_intflags(pTHX_ const char *lead, const U32 flags)
- {
- @@ -907,8 +1053,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
- #endif /* DEBUGGING */
- }
-
- -#ifdef DEBUGGING
-
- +#ifdef DEBUGGING
- STATIC void
- S_put_code_point(pTHX_ SV *sv, UV c)
- {
- @@ -1517,154 +1663,4 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
-
- return did_output_something;
- }
- -
- -
- -const regnode *
- -Perl_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
- - const regnode *last, const regnode *plast,
- - SV* sv, I32 indent, U32 depth)
- -{
- - const regnode *next;
- - const regnode *optstart= NULL;
- -
- - RXi_GET_DECL(r, ri);
- - DECLARE_AND_GET_RE_DEBUG_FLAGS;
- -
- - PERL_ARGS_ASSERT_DUMPUNTIL;
- -
- -#ifdef DEBUG_DUMPUNTIL
- - Perl_re_printf( aTHX_ "--- %d : %d - %d - %d\n", indent, node-start,
- - last ? last-start : 0, plast ? plast-start : 0);
- -#endif
- -
- - if (plast && plast < last)
- - last= plast;
- -
- - while (node && (!last || node < last)) {
- - const U8 op = OP(node);
- -
- - if (op == CLOSE || op == SRCLOSE || op == WHILEM)
- - indent--;
- - next = regnext((regnode *)node);
- - const regnode *after = regnode_after((regnode *)node,0);
- -
- - /* Where, what. */
- - if (op == OPTIMIZED) {
- - if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
- - optstart = node;
- - else
- - goto after_print;
- - } else
- - CLEAR_OPTSTART;
- -
- - regprop(r, sv, node, NULL, NULL);
- - Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start),
- - (int)(2*indent + 1), "", SvPVX_const(sv));
- -
- - if (op != OPTIMIZED) {
- - if (next == NULL) /* Next ptr. */
- - Perl_re_printf( aTHX_ " (0)");
- - else if (REGNODE_TYPE(op) == BRANCH
- - && REGNODE_TYPE(OP(next)) != BRANCH )
- - Perl_re_printf( aTHX_ " (FAIL)");
- - else
- - Perl_re_printf( aTHX_ " (%" IVdf ")", (IV)(next - start));
- - Perl_re_printf( aTHX_ "\n");
- - }
- -
- - after_print:
- - if (REGNODE_TYPE(op) == BRANCHJ) {
- - assert(next);
- - const regnode *nnode = (OP(next) == LONGJMP
- - ? regnext((regnode *)next)
- - : next);
- - if (last && nnode > last)
- - nnode = last;
- - DUMPUNTIL(after, nnode);
- - }
- - else if (REGNODE_TYPE(op) == BRANCH) {
- - assert(next);
- - DUMPUNTIL(after, next);
- - }
- - else if ( REGNODE_TYPE(op) == TRIE ) {
- - const regnode *this_trie = node;
- - const U32 n = ARG1u(node);
- - const reg_ac_data * const ac = op>=AHOCORASICK ?
- - (reg_ac_data *)ri->data->data[n] :
- - NULL;
- - const reg_trie_data * const trie =
- - (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
- -#ifdef DEBUGGING
- - AV *const trie_words
- - = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
- -#endif
- - const regnode *nextbranch= NULL;
- - I32 word_idx;
- - SvPVCLEAR(sv);
- - for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
- - SV ** const elem_ptr = av_fetch_simple(trie_words, word_idx, 0);
- -
- - Perl_re_indentf( aTHX_ "%s ",
- - indent+3,
- - elem_ptr
- - ? pv_pretty(sv, SvPV_nolen_const(*elem_ptr),
- - SvCUR(*elem_ptr), PL_dump_re_max_len,
- - PL_colors[0], PL_colors[1],
- - (SvUTF8(*elem_ptr)
- - ? PERL_PV_ESCAPE_UNI
- - : 0)
- - | PERL_PV_PRETTY_ELLIPSES
- - | PERL_PV_PRETTY_LTGT
- - )
- - : "???"
- - );
- - if (trie->jump) {
- - U16 dist= trie->jump[word_idx+1];
- - Perl_re_printf( aTHX_ "(%" UVuf ")\n",
- - (UV)((dist ? this_trie + dist : next) - start));
- - if (dist) {
- - if (!nextbranch)
- - nextbranch= this_trie + trie->jump[0];
- - DUMPUNTIL(this_trie + dist, nextbranch);
- - }
- - if (nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
- - nextbranch= regnext((regnode *)nextbranch);
- - } else {
- - Perl_re_printf( aTHX_ "\n");
- - }
- - }
- - if (last && next > last)
- - node= last;
- - else
- - node= next;
- - }
- - else if ( op == CURLY ) { /* "next" might be very big: optimizer */
- - DUMPUNTIL(after, after + 1); /* +1 is NOT a REGNODE_AFTER */
- - }
- - else if (REGNODE_TYPE(op) == CURLY && op != CURLYX) {
- - assert(next);
- - DUMPUNTIL(after, next);
- - }
- - else if ( op == PLUS || op == STAR) {
- - DUMPUNTIL(after, after + 1); /* +1 NOT a REGNODE_AFTER */
- - }
- - else if (REGNODE_TYPE(op) == EXACT || op == ANYOFHs) {
- - /* Literal string, where present. */
- - node = (const regnode *)REGNODE_AFTER_varies(node);
- - }
- - else {
- - node = REGNODE_AFTER_opcode(node,op);
- - }
- - if (op == CURLYX || op == OPEN || op == SROPEN)
- - indent++;
- - if (REGNODE_TYPE(op) == END)
- - break;
- - }
- - CLEAR_OPTSTART;
- -#ifdef DEBUG_DUMPUNTIL
- - Perl_re_printf( aTHX_ "--- %d\n", (int)indent);
- -#endif
- - return node;
- -}
- -
- -#endif /* DEBUGGING */
- +#endif /* DEBUGGING */
- diff --git a/regcomp_invlist.c b/regcomp_invlist.c
- index 9ea3f431817d..82f82305846a 100644
- --- a/regcomp_invlist.c
- +++ b/regcomp_invlist.c
- @@ -18,7 +18,7 @@
- #include "unicode_constants.h"
- #include "regcomp_internal.h"
-
- -
- +#ifdef PERL_RE_BUILD_AUX
- void
- Perl_populate_bitmap_from_invlist(pTHX_ SV * invlist, const UV offset, const U8 * bitmap, const Size_t len)
- {
- @@ -70,6 +70,7 @@ Perl_populate_invlist_from_bitmap(pTHX_ const U8 * bitmap, const Size_t bitmap_l
- }
- }
- }
- +#endif /* PERL_RE_BUILD_AUX */
-
- /* This section of code defines the inversion list object and its methods. The
- * interfaces are highly subject to change, so as much as possible is static to
- diff --git a/regexec.c b/regexec.c
- index c404d9aa3d73..de0b7c461918 100644
- --- a/regexec.c
- +++ b/regexec.c
- @@ -4428,7 +4428,8 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
- */
- #define REPORT_CODE_OFF 29
- #define INDENT_CHARS(depth) ((int)(depth) % 20)
- -#ifdef DEBUGGING
- +
- +#ifdef PERL_RE_BUILD_DEBUG
- int
- Perl_re_exec_indentf(pTHX_ const char *fmt, U32 depth, ...)
- {
|