Skip to content

Commit c277df4

Browse files
Ilya Zakharevich
Ilya Zakharevich
authored and
Malcolm Beattie
committed
Jumbo regexp patch applied (with minor fix-up tweaks):
Subject: Version 7 of Jumbo RE patch available p4raw-id: /depot/perl@267
1 parent 5d5aaa5 commit c277df4

26 files changed

+3166
-1127
lines changed

dump.c

+5-6
Original file line numberDiff line numberDiff line change
@@ -359,18 +359,17 @@ dump_pm(PMOP *pm)
359359
dump("PMf_REPL = ");
360360
dump_op(pm->op_pmreplroot);
361361
}
362-
if (pm->op_pmshort) {
363-
dump("PMf_SHORT = %s\n",SvPEEK(pm->op_pmshort));
364-
}
365-
if (pm->op_pmflags) {
362+
if (pm->op_pmflags || (pm->op_pmregexp && pm->op_pmregexp->check_substr)) {
366363
SV *tmpsv = newSVpv("", 0);
367364
if (pm->op_pmflags & PMf_USED)
368365
sv_catpv(tmpsv, ",USED");
369366
if (pm->op_pmflags & PMf_ONCE)
370367
sv_catpv(tmpsv, ",ONCE");
371-
if (pm->op_pmflags & PMf_SCANFIRST)
368+
if (pm->op_pmregexp && pm->op_pmregexp->check_substr
369+
&& !(pm->op_pmregexp->reganch & ROPT_NOSCAN))
372370
sv_catpv(tmpsv, ",SCANFIRST");
373-
if (pm->op_pmflags & PMf_ALL)
371+
if (pm->op_pmregexp && pm->op_pmregexp->check_substr
372+
&& pm->op_pmregexp->reganch & ROPT_CHECK_ALL)
374373
sv_catpv(tmpsv, ",ALL");
375374
if (pm->op_pmflags & PMf_SKIPWHITE)
376375
sv_catpv(tmpsv, ",SKIPWHITE");

embed.h

+3-24
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@
185185
#define do_trans Perl_do_trans
186186
#define do_vecset Perl_do_vecset
187187
#define do_vop Perl_do_vop
188-
#define doeval Perl_doeval
189188
#define dofindlabel Perl_dofindlabel
190189
#define dopoptoeval Perl_dopoptoeval
191190
#define dounwind Perl_dounwind
@@ -331,6 +330,7 @@
331330
#define magic_clearsig Perl_magic_clearsig
332331
#define magic_existspack Perl_magic_existspack
333332
#define magic_freedefelem Perl_magic_freedefelem
333+
#define magic_freeregexp Perl_magic_freeregexp
334334
#define magic_get Perl_magic_get
335335
#define magic_getarylen Perl_magic_getarylen
336336
#define magic_getdefelem Perl_magic_getdefelem
@@ -890,32 +890,11 @@
890890
#define reall_srchlen Perl_reall_srchlen
891891
#define ref Perl_ref
892892
#define refkids Perl_refkids
893-
#define regarglen Perl_regarglen
894-
#define regbol Perl_regbol
895-
#define regcode Perl_regcode
896-
#define regdummy Perl_regdummy
897893
#define regdump Perl_regdump
898-
#define regendp Perl_regendp
899-
#define regeol Perl_regeol
900-
#define reginput Perl_reginput
894+
#define regexec_flags Perl_regexec_flags
901895
#define regkind Perl_regkind
902-
#define reglastparen Perl_reglastparen
903-
#define regmyendp Perl_regmyendp
904-
#define regmyp_size Perl_regmyp_size
905-
#define regmystartp Perl_regmystartp
906-
#define regnarrate Perl_regnarrate
907-
#define regnaughty Perl_regnaughty
908896
#define regnext Perl_regnext
909-
#define regnpar Perl_regnpar
910-
#define regparse Perl_regparse
911-
#define regprecomp Perl_regprecomp
912-
#define regprev Perl_regprev
913897
#define regprop Perl_regprop
914-
#define regsawback Perl_regsawback
915-
#define regsize Perl_regsize
916-
#define regstartp Perl_regstartp
917-
#define regtill Perl_regtill
918-
#define regxend Perl_regxend
919898
#define repeat_amg Perl_repeat_amg
920899
#define repeat_ass_amg Perl_repeat_ass_amg
921900
#define repeatcpy Perl_repeatcpy
@@ -1046,6 +1025,7 @@
10461025
#define sv_cmp Perl_sv_cmp
10471026
#define sv_cmp_locale Perl_sv_cmp_locale
10481027
#define sv_collxfrm Perl_sv_collxfrm
1028+
#define sv_compile_2op Perl_sv_compile_2op
10491029
#define sv_dec Perl_sv_dec
10501030
#define sv_derived_from Perl_sv_derived_from
10511031
#define sv_dump Perl_sv_dump
@@ -1197,7 +1177,6 @@
11971177
#define no_wrongref Perl_no_wrongref
11981178
#define pad_reset_pending Perl_pad_reset_pending
11991179
#define padix_floor Perl_padix_floor
1200-
#define regflags Perl_regflags
12011180
#define safecalloc Perl_safecalloc
12021181
#define safefree Perl_safefree
12031182
#define safemalloc Perl_safemalloc

global.sym

+7-24
Original file line numberDiff line numberDiff line change
@@ -173,30 +173,11 @@ psig_name
173173
psig_ptr
174174
rcsid
175175
reall_srchlen
176-
regarglen
177-
regbol
178-
regcode
179-
regdummy
180-
regendp
181-
regeol
182-
regflags
183-
reginput
176+
regdump
177+
regexec_flags
184178
regkind
185-
reglastparen
186-
regmyendp
187-
regmyp_size
188-
regmystartp
189-
regnarrate
190-
regnaughty
191-
regnpar
192-
regparse
193-
regprecomp
194-
regprev
195-
regsawback
196-
regsize
197-
regstartp
198-
regtill
199-
regxend
179+
regnext
180+
regprop
200181
repeat_amg
201182
repeat_ass_amg
202183
retstack
@@ -429,7 +410,6 @@ do_tell
429410
do_trans
430411
do_vecset
431412
do_vop
432-
doeval
433413
dofindlabel
434414
dopoptoeval
435415
dounwind
@@ -526,6 +506,7 @@ magic_clearpack
526506
magic_clearsig
527507
magic_existspack
528508
magic_freedefelem
509+
magic_freeregexp
529510
magic_get
530511
magic_getarylen
531512
magic_getdefelem
@@ -1021,6 +1002,7 @@ q
10211002
ref
10221003
refkids
10231004
regdump
1005+
regexec_flags
10241006
regnext
10251007
regprop
10261008
repeatcpy
@@ -1120,6 +1102,7 @@ sv_clean_objs
11201102
sv_clear
11211103
sv_cmp
11221104
sv_cmp_locale
1105+
sv_compile_2op
11231106
sv_collxfrm
11241107
sv_dec
11251108
sv_derived_from

mg.c

+9-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ magic_get(SV *sv, MAGIC *mg)
418418
}
419419
sv_setpvn(sv,s,i);
420420
if (tainting)
421-
tainted = was_tainted || rx->exec_tainted;
421+
tainted = was_tainted || RX_MATCH_TAINTED(rx);
422422
break;
423423
}
424424
}
@@ -1305,6 +1305,14 @@ magic_setuvar(SV *sv, MAGIC *mg)
13051305
return 0;
13061306
}
13071307

1308+
int
1309+
magic_freeregexp(SV *sv, MAGIC *mg)
1310+
{
1311+
regexp *re = (regexp *)mg->mg_obj;
1312+
ReREFCNT_dec(re);
1313+
return 0;
1314+
}
1315+
13081316
#ifdef USE_LOCALE_COLLATE
13091317
int
13101318
magic_setcollxfrm(SV *sv, MAGIC *mg)

op.c

+13-10
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,12 @@ find_thread_magical(char *name)
522522
case ';':
523523
sv_setpv(sv, "\034");
524524
break;
525+
case '&':
526+
case '`':
527+
case '\'':
528+
sawampersand = TRUE;
529+
SvREADONLY_on(sv);
530+
break;
525531
}
526532
sv_magic(sv, 0, name, 1);
527533
DEBUG_L(PerlIO_printf(PerlIO_stderr(),
@@ -594,8 +600,7 @@ op_free(OP *o)
594600
/* FALL THROUGH */
595601
case OP_PUSHRE:
596602
case OP_MATCH:
597-
pregfree(cPMOPo->op_pmregexp);
598-
SvREFCNT_dec(cPMOPo->op_pmshort);
603+
ReREFCNT_dec(cPMOPo->op_pmregexp);
599604
break;
600605
}
601606

@@ -1914,7 +1919,12 @@ newUNOP(I32 type, I32 flags, OP *first)
19141919
unop->op_first = first;
19151920
unop->op_flags = flags | OPf_KIDS;
19161921
unop->op_private = 1 | (flags >> 8);
1917-
1922+
#if 1
1923+
if(type == OP_STUDY && first->op_type == OP_MATCH) {
1924+
first->op_type = OP_PUSHRE;
1925+
first->op_ppaddr = ppaddr[OP_PUSHRE];
1926+
}
1927+
#endif
19181928
unop = (UNOP*) CHECKOP(type, unop);
19191929
if (unop->op_next)
19201930
return (OP*)unop;
@@ -2065,7 +2075,6 @@ pmruntime(OP *o, OP *expr, OP *repl)
20652075
pm->op_pmregexp = pregcomp(p, p + plen, pm);
20662076
if (strEQ("\\s+", pm->op_pmregexp->precomp))
20672077
pm->op_pmflags |= PMf_WHITE;
2068-
hoistmust(pm);
20692078
op_free(expr);
20702079
}
20712080
else {
@@ -4446,7 +4455,6 @@ OP *
44464455
ck_split(OP *o)
44474456
{
44484457
register OP *kid;
4449-
PMOP* pm;
44504458

44514459
if (o->op_flags & OPf_STACKED)
44524460
return no_fh_allowed(o);
@@ -4471,11 +4479,6 @@ ck_split(OP *o)
44714479
cLISTOPo->op_first = kid;
44724480
kid->op_sibling = sibl;
44734481
}
4474-
pm = (PMOP*)kid;
4475-
if (pm->op_pmshort && !(pm->op_pmflags & PMf_ALL)) {
4476-
SvREFCNT_dec(pm->op_pmshort); /* can't use substring to optimize */
4477-
pm->op_pmshort = 0;
4478-
}
44794482

44804483
kid->op_type = OP_PUSHRE;
44814484
kid->op_ppaddr = ppaddr[OP_PUSHRE];

op.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,14 @@ struct pmop {
177177
OP * op_pmreplstart;
178178
PMOP * op_pmnext; /* list of all scanpats */
179179
REGEXP * op_pmregexp; /* compiled expression */
180-
SV * op_pmshort; /* for a fast bypass of execute() */
181180
U16 op_pmflags;
182181
U16 op_pmpermflags;
183-
char op_pmslen;
184182
};
185183

186184
#define PMf_USED 0x0001 /* pm has been used once already */
187185
#define PMf_ONCE 0x0002 /* use pattern only once per reset */
188-
#define PMf_SCANFIRST 0x0004 /* initial constant not anchored */
189-
#define PMf_ALL 0x0008 /* initial constant is whole pat */
186+
#define PMf_REVERSED 0x0004 /* Should be matched right->left */
187+
/*#define PMf_ALL 0x0008*/ /* initial constant is whole pat */
190188
#define PMf_SKIPWHITE 0x0010 /* skip leading whitespace for split */
191189
#define PMf_FOLD 0x0020 /* case insensitivity */
192190
#define PMf_CONST 0x0040 /* subst replacement is constant */

perl.c

-30
Original file line numberDiff line numberDiff line change
@@ -418,36 +418,6 @@ perl_destruct(register PerlInterpreter *sv_interp)
418418

419419
/* defgv, aka *_ should be taken care of elsewhere */
420420

421-
#if 0 /* just about all regexp stuff, seems to be ok */
422-
423-
/* shortcuts to regexp stuff */
424-
leftgv = Nullgv;
425-
ampergv = Nullgv;
426-
427-
SAVEFREEOP(curpm);
428-
SAVEFREEOP(oldlastpm); /* for saving regexp context during debugger */
429-
430-
regprecomp = NULL; /* uncompiled string. */
431-
regparse = NULL; /* Input-scan pointer. */
432-
regxend = NULL; /* End of input for compile */
433-
regnpar = 0; /* () count. */
434-
regcode = NULL; /* Code-emit pointer; &regdummy = don't. */
435-
regsize = 0; /* Code size. */
436-
regnaughty = 0; /* How bad is this pattern? */
437-
regsawback = 0; /* Did we see \1, ...? */
438-
439-
reginput = NULL; /* String-input pointer. */
440-
regbol = NULL; /* Beginning of input, for ^ check. */
441-
regeol = NULL; /* End of input, for $ check. */
442-
regstartp = (char **)NULL; /* Pointer to startp array. */
443-
regendp = (char **)NULL; /* Ditto for endp. */
444-
reglastparen = 0; /* Similarly for lastparen. */
445-
regtill = NULL; /* How far we are required to go. */
446-
regflags = 0; /* are we folding, multilining? */
447-
regprev = (char)NULL; /* char before regbol, \n if none */
448-
449-
#endif /* if 0 */
450-
451421
/* clean up after study() */
452422
SvREFCNT_dec(lastscream);
453423
lastscream = Nullsv;

perl.h

+3-23
Original file line numberDiff line numberDiff line change
@@ -1748,29 +1748,6 @@ EXT U32 hints; /* various compilation flags */
17481748
#define HINT_STRICT_VARS 0x00000400
17491749
#define HINT_LOCALE 0x00000800
17501750

1751-
/**************************************************************************/
1752-
/* This regexp stuff is global since it always happens within 1 expr eval */
1753-
/**************************************************************************/
1754-
1755-
EXT char * regprecomp; /* uncompiled string. */
1756-
EXT char * regparse; /* Input-scan pointer. */
1757-
EXT char * regxend; /* End of input for compile */
1758-
EXT I32 regnpar; /* () count. */
1759-
EXT char * regcode; /* Code-emit pointer; &regdummy = don't. */
1760-
EXT I32 regsize; /* Code size. */
1761-
EXT I32 regnaughty; /* How bad is this pattern? */
1762-
EXT I32 regsawback; /* Did we see \1, ...? */
1763-
1764-
EXT char * reginput; /* String-input pointer. */
1765-
EXT char * regbol; /* Beginning of input, for ^ check. */
1766-
EXT char * regeol; /* End of input, for $ check. */
1767-
EXT char ** regstartp; /* Pointer to startp array. */
1768-
EXT char ** regendp; /* Ditto for endp. */
1769-
EXT U32 * reglastparen; /* Similarly for lastparen. */
1770-
EXT char * regtill; /* How far we are required to go. */
1771-
EXT U16 regflags; /* are we folding, multilining? */
1772-
EXT char regprev; /* char before regbol, \n if none */
1773-
17741751
EXT bool do_undump; /* -u or dump seen? */
17751752
EXT VOL U32 debug;
17761753

@@ -2072,6 +2049,8 @@ EXT MGVTBL vtbl_mutex = {0, 0, 0, 0, magic_mutexfree};
20722049
EXT MGVTBL vtbl_defelem = {magic_getdefelem,magic_setdefelem,
20732050
0, 0, magic_freedefelem};
20742051

2052+
EXT MGVTBL vtbl_regexp = {0,0, magic_freeregexp};
2053+
20752054
#ifdef USE_LOCALE_COLLATE
20762055
EXT MGVTBL vtbl_collxfrm = {0,
20772056
magic_setcollxfrm,
@@ -2114,6 +2093,7 @@ EXT MGVTBL vtbl_mutex;
21142093
#endif /* USE_THREADS */
21152094

21162095
EXT MGVTBL vtbl_defelem;
2096+
EXT MGVTBL vtbl_regexp;
21172097

21182098
#ifdef USE_LOCALE_COLLATE
21192099
EXT MGVTBL vtbl_collxfrm;

0 commit comments

Comments
 (0)

Follow Lee on X/Twitter - Father, Husband, Serial builder creating AI, crypto, games & web tools. We are friends :) AI Will Come To Life!

Check out: eBank.nz (Art Generator) | Netwrck.com (AI Tools) | Text-Generator.io (AI API) | BitBank.nz (Crypto AI) | ReadingTime (Kids Reading) | RewordGame | BigMultiplayerChess | WebFiddle | How.nz | Helix AI Assistant