src/src/pcre/pcre_exec.c

   1 /* $Cambridge: exim/src/src/pcre/pcre_exec.c,v 1.4 2007/01/23 15:08:45 ph10 Exp $ */
   2
   3 /*************************************************
   4 *      Perl-Compatible Regular Expressions       *
   5 *************************************************/
   6
   7 /* PCRE is a library of functions to support regular expressions whose syntax
   8 and semantics are as close as possible to those of the Perl 5 language.
   9
  10                        Written by Philip Hazel
  11            Copyright (c) 1997-2006 University of Cambridge
  12
  13 -----------------------------------------------------------------------------
  14 Redistribution and use in source and binary forms, with or without
  15 modification, are permitted provided that the following conditions are met:
  16
  17     * Redistributions of source code must retain the above copyright notice,
  18       this list of conditions and the following disclaimer.
  19
  20     * Redistributions in binary form must reproduce the above copyright
  21       notice, this list of conditions and the following disclaimer in the
  22       documentation and/or other materials provided with the distribution.
  23
  24     * Neither the name of the University of Cambridge nor the names of its
  25       contributors may be used to endorse or promote products derived from
  26       this software without specific prior written permission.
  27
  28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  38 POSSIBILITY OF SUCH DAMAGE.
  39 -----------------------------------------------------------------------------
  40 */
  41
  42
  43 /* This module contains pcre_exec(), the externally visible function that does
  44 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
  45 possible. There are also some static supporting functions. */
  46
  47 #define NLBLOCK md             /* Block containing newline information */
  48 #define PSSTART start_subject  /* Field containing processed string start */
  49 #define PSEND   end_subject    /* Field containing processed string end */
  50
  51 #include "pcre_internal.h"
  52
  53 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
  54 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
  55
  56 #define EPTR_WORK_SIZE (1000)
  57
  58 /* Flag bits for the match() function */
  59
  60 #define match_condassert     0x01  /* Called to check a condition assertion */
  61 #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
  62 #define match_tail_recursed  0x04  /* Tail recursive call */
  63
  64 /* Non-error returns from the match() function. Error returns are externally
  65 defined PCRE_ERROR_xxx codes, which are all negative. */
  66
  67 #define MATCH_MATCH        1
  68 #define MATCH_NOMATCH      0
  69
  70 /* Maximum number of ints of offset to save on the stack for recursive calls.
  71 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
  72 because the offset vector is always a multiple of 3 long. */
  73
  74 #define REC_STACK_SAVE_MAX 30
  75
  76 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
  77
  78 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
  79 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
  80
  81
  82
  83 #ifdef DEBUG
  84 /*************************************************
  85 *        Debugging function to print chars       *
  86 *************************************************/
  87
  88 /* Print a sequence of chars in printable format, stopping at the end of the
  89 subject if the requested.
  90
  91 Arguments:
  92   p           points to characters
  93   length      number to print
  94   is_subject  TRUE if printing from within md->start_subject
  95   md          pointer to matching data block, if is_subject is TRUE
  96
  97 Returns:     nothing
  98 */
  99
 100 static void
 101 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
 102 {
 103 unsigned int c;
 104 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
 105 while (length-- > 0)
 106   if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
 107 }
 108 #endif
 109
 110
 111
 112 /*************************************************
 113 *          Match a back-reference                *
 114 *************************************************/
 115
 116 /* If a back reference hasn't been set, the length that is passed is greater
 117 than the number of characters left in the string, so the match fails.
 118
 119 Arguments:
 120   offset      index into the offset vector
 121   eptr        points into the subject
 122   length      length to be matched
 123   md          points to match data block
 124   ims         the ims flags
 125
 126 Returns:      TRUE if matched
 127 */
 128
 129 static BOOL
 130 match_ref(int offset, register USPTR eptr, int length, match_data *md,
 131   unsigned long int ims)
 132 {
 133 USPTR p = md->start_subject + md->offset_vector[offset];
 134
 135 #ifdef DEBUG
 136 if (eptr >= md->end_subject)
 137   printf("matching subject <null>");
 138 else
 139   {
 140   printf("matching subject ");
 141   pchars(eptr, length, TRUE, md);
 142   }
 143 printf(" against backref ");
 144 pchars(p, length, FALSE, md);
 145 printf("\n");
 146 #endif
 147
 148 /* Always fail if not enough characters left */
 149
 150 if (length > md->end_subject - eptr) return FALSE;
 151
 152 /* Separate the caselesss case for speed */
 153
 154 if ((ims & PCRE_CASELESS) != 0)
 155   {
 156   while (length-- > 0)
 157     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
 158   }
 159 else
 160   { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
 161
 162 return TRUE;
 163 }
 164
 165
 166
 167 /***************************************************************************
 168 ****************************************************************************
 169                    RECURSION IN THE match() FUNCTION
 170
 171 The match() function is highly recursive, though not every recursive call
 172 increases the recursive depth. Nevertheless, some regular expressions can cause
 173 it to recurse to a great depth. I was writing for Unix, so I just let it call
 174 itself recursively. This uses the stack for saving everything that has to be
 175 saved for a recursive call. On Unix, the stack can be large, and this works
 176 fine.
 177
 178 It turns out that on some non-Unix-like systems there are problems with
 179 programs that use a lot of stack. (This despite the fact that every last chip
 180 has oodles of memory these days, and techniques for extending the stack have
 181 been known for decades.) So....
 182
 183 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
 184 calls by keeping local variables that need to be preserved in blocks of memory
 185 obtained from malloc() instead instead of on the stack. Macros are used to
 186 achieve this so that the actual code doesn't look very different to what it
 187 always used to.
 188 ****************************************************************************
 189 ***************************************************************************/
 190
 191
 192 /* These versions of the macros use the stack, as normal. There are debugging
 193 versions and production versions. */
 194
 195 #ifndef NO_RECURSE
 196 #define REGISTER register
 197 #ifdef DEBUG
 198 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
 199   { \
 200   printf("match() called in line %d\n", __LINE__); \
 201   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
 202   printf("to line %d\n", __LINE__); \
 203   }
 204 #define RRETURN(ra) \
 205   { \
 206   printf("match() returned %d from line %d ", ra, __LINE__); \
 207   return ra; \
 208   }
 209 #else
 210 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
 211   rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
 212 #define RRETURN(ra) return ra
 213 #endif
 214
 215 #else
 216
 217
 218 /* These versions of the macros manage a private stack on the heap. Note
 219 that the rd argument of RMATCH isn't actually used. It's the md argument of
 220 match(), which never changes. */
 221
 222 #define REGISTER
 223
 224 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
 225   {\
 226   heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
 227   if (setjmp(frame->Xwhere) == 0)\
 228     {\
 229     newframe->Xeptr = ra;\
 230     newframe->Xecode = rb;\
 231     newframe->Xoffset_top = rc;\
 232     newframe->Xims = re;\
 233     newframe->Xeptrb = rf;\
 234     newframe->Xflags = rg;\
 235     newframe->Xrdepth = frame->Xrdepth + 1;\
 236     newframe->Xprevframe = frame;\
 237     frame = newframe;\
 238     DPRINTF(("restarting from line %d\n", __LINE__));\
 239     goto HEAP_RECURSE;\
 240     }\
 241   else\
 242     {\
 243     DPRINTF(("longjumped back to line %d\n", __LINE__));\
 244     frame = md->thisframe;\
 245     rx = frame->Xresult;\
 246     }\
 247   }
 248
 249 #define RRETURN(ra)\
 250   {\
 251   heapframe *newframe = frame;\
 252   frame = newframe->Xprevframe;\
 253   (pcre_stack_free)(newframe);\
 254   if (frame != NULL)\
 255     {\
 256     frame->Xresult = ra;\
 257     md->thisframe = frame;\
 258     longjmp(frame->Xwhere, 1);\
 259     }\
 260   return ra;\
 261   }
 262
 263
 264 /* Structure for remembering the local variables in a private frame */
 265
 266 typedef struct heapframe {
 267   struct heapframe *Xprevframe;
 268
 269   /* Function arguments that may change */
 270
 271   const uschar *Xeptr;
 272   const uschar *Xecode;
 273   int Xoffset_top;
 274   long int Xims;
 275   eptrblock *Xeptrb;
 276   int Xflags;
 277   unsigned int Xrdepth;
 278
 279   /* Function local variables */
 280
 281   const uschar *Xcallpat;
 282   const uschar *Xcharptr;
 283   const uschar *Xdata;
 284   const uschar *Xnext;
 285   const uschar *Xpp;
 286   const uschar *Xprev;
 287   const uschar *Xsaved_eptr;
 288
 289   recursion_info Xnew_recursive;
 290
 291   BOOL Xcur_is_word;
 292   BOOL Xcondition;
 293   BOOL Xprev_is_word;
 294
 295   unsigned long int Xoriginal_ims;
 296
 297 #ifdef SUPPORT_UCP
 298   int Xprop_type;
 299   int Xprop_value;
 300   int Xprop_fail_result;
 301   int Xprop_category;
 302   int Xprop_chartype;
 303   int Xprop_script;
 304 #endif
 305
 306   int Xctype;
 307   unsigned int Xfc;
 308   int Xfi;
 309   int Xlength;
 310   int Xmax;
 311   int Xmin;
 312   int Xnumber;
 313   int Xoffset;
 314   int Xop;
 315   int Xsave_capture_last;
 316   int Xsave_offset1, Xsave_offset2, Xsave_offset3;
 317   int Xstacksave[REC_STACK_SAVE_MAX];
 318
 319   eptrblock Xnewptrb;
 320
 321   /* Place to pass back result, and where to jump back to */
 322
 323   int  Xresult;
 324   jmp_buf Xwhere;
 325
 326 } heapframe;
 327
 328 #endif
 329
 330
 331 /***************************************************************************
 332 ***************************************************************************/
 333
 334
 335
 336 /*************************************************
 337 *         Match from current position            *
 338 *************************************************/
 339
 340 /* This function is called recursively in many circumstances. Whenever it
 341 returns a negative (error) response, the outer incarnation must also return the
 342 same response.
 343
 344 Performance note: It might be tempting to extract commonly used fields from the
 345 md structure (e.g. utf8, end_subject) into individual variables to improve
 346 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 347 made performance worse.
 348
 349 Arguments:
 350    eptr        pointer to current character in subject
 351    ecode       pointer to current position in compiled code
 352    offset_top  current top pointer
 353    md          pointer to "static" info for the match
 354    ims         current /i, /m, and /s options
 355    eptrb       pointer to chain of blocks containing eptr at start of
 356                  brackets - for testing for empty matches
 357    flags       can contain
 358                  match_condassert - this is an assertion condition
 359                  match_cbegroup - this is the start of an unlimited repeat
 360                    group that can match an empty string
 361                  match_tail_recursed - this is a tail_recursed group
 362    rdepth      the recursion depth
 363
 364 Returns:       MATCH_MATCH if matched            )  these values are >= 0
 365                MATCH_NOMATCH if failed to match  )
 366                a negative PCRE_ERROR_xxx value if aborted by an error condition
 367                  (e.g. stopped by repeated call or recursion limit)
 368 */
 369
 370 static int
 371 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
 372   int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
 373   int flags, unsigned int rdepth)
 374 {
 375 /* These variables do not need to be preserved over recursion in this function,
 376 so they can be ordinary variables in all cases. Mark some of them with
 377 "register" because they are used a lot in loops. */
 378
 379 register int  rrc;         /* Returns from recursive calls */
 380 register int  i;           /* Used for loops not involving calls to RMATCH() */
 381 register unsigned int c;   /* Character values not kept over RMATCH() calls */
 382 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 383
 384 BOOL minimize, possessive; /* Quantifier options */
 385
 386 /* When recursion is not being used, all "local" variables that have to be
 387 preserved over calls to RMATCH() are part of a "frame" which is obtained from
 388 heap storage. Set up the top-level frame here; others are obtained from the
 389 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
 390
 391 #ifdef NO_RECURSE
 392 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
 393 frame->Xprevframe = NULL;            /* Marks the top level */
 394
 395 /* Copy in the original argument variables */
 396
 397 frame->Xeptr = eptr;
 398 frame->Xecode = ecode;
 399 frame->Xoffset_top = offset_top;
 400 frame->Xims = ims;
 401 frame->Xeptrb = eptrb;
 402 frame->Xflags = flags;
 403 frame->Xrdepth = rdepth;
 404
 405 /* This is where control jumps back to to effect "recursion" */
 406
 407 HEAP_RECURSE:
 408
 409 /* Macros make the argument variables come from the current frame */
 410
 411 #define eptr               frame->Xeptr
 412 #define ecode              frame->Xecode
 413 #define offset_top         frame->Xoffset_top
 414 #define ims                frame->Xims
 415 #define eptrb              frame->Xeptrb
 416 #define flags              frame->Xflags
 417 #define rdepth             frame->Xrdepth
 418
 419 /* Ditto for the local variables */
 420
 421 #ifdef SUPPORT_UTF8
 422 #define charptr            frame->Xcharptr
 423 #endif
 424 #define callpat            frame->Xcallpat
 425 #define data               frame->Xdata
 426 #define next               frame->Xnext
 427 #define pp                 frame->Xpp
 428 #define prev               frame->Xprev
 429 #define saved_eptr         frame->Xsaved_eptr
 430
 431 #define new_recursive      frame->Xnew_recursive
 432
 433 #define cur_is_word        frame->Xcur_is_word
 434 #define condition          frame->Xcondition
 435 #define prev_is_word       frame->Xprev_is_word
 436
 437 #define original_ims       frame->Xoriginal_ims
 438
 439 #ifdef SUPPORT_UCP
 440 #define prop_type          frame->Xprop_type
 441 #define prop_value         frame->Xprop_value
 442 #define prop_fail_result   frame->Xprop_fail_result
 443 #define prop_category      frame->Xprop_category
 444 #define prop_chartype      frame->Xprop_chartype
 445 #define prop_script        frame->Xprop_script
 446 #endif
 447
 448 #define ctype              frame->Xctype
 449 #define fc                 frame->Xfc
 450 #define fi                 frame->Xfi
 451 #define length             frame->Xlength
 452 #define max                frame->Xmax
 453 #define min                frame->Xmin
 454 #define number             frame->Xnumber
 455 #define offset             frame->Xoffset
 456 #define op                 frame->Xop
 457 #define save_capture_last  frame->Xsave_capture_last
 458 #define save_offset1       frame->Xsave_offset1
 459 #define save_offset2       frame->Xsave_offset2
 460 #define save_offset3       frame->Xsave_offset3
 461 #define stacksave          frame->Xstacksave
 462
 463 #define newptrb            frame->Xnewptrb
 464
 465 /* When recursion is being used, local variables are allocated on the stack and
 466 get preserved during recursion in the normal way. In this environment, fi and
 467 i, and fc and c, can be the same variables. */
 468
 469 #else         /* NO_RECURSE not defined */
 470 #define fi i
 471 #define fc c
 472
 473
 474 #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
 475 const uschar *charptr;             /* in small blocks of the code. My normal */
 476 #endif                             /* style of coding would have declared    */
 477 const uschar *callpat;             /* them within each of those blocks.      */
 478 const uschar *data;                /* However, in order to accommodate the   */
 479 const uschar *next;                /* version of this code that uses an      */
 480 USPTR         pp;                  /* external "stack" implemented on the    */
 481 const uschar *prev;                /* heap, it is easier to declare them all */
 482 USPTR         saved_eptr;          /* here, so the declarations can be cut   */
 483                                    /* out in a block. The only declarations  */
 484 recursion_info new_recursive;      /* within blocks below are for variables  */
 485                                    /* that do not have to be preserved over  */
 486 BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
 487 BOOL condition;
 488 BOOL prev_is_word;
 489
 490 unsigned long int original_ims;
 491
 492 #ifdef SUPPORT_UCP
 493 int prop_type;
 494 int prop_value;
 495 int prop_fail_result;
 496 int prop_category;
 497 int prop_chartype;
 498 int prop_script;
 499 #endif
 500
 501 int ctype;
 502 int length;
 503 int max;
 504 int min;
 505 int number;
 506 int offset;
 507 int op;
 508 int save_capture_last;
 509 int save_offset1, save_offset2, save_offset3;
 510 int stacksave[REC_STACK_SAVE_MAX];
 511
 512 eptrblock newptrb;
 513 #endif     /* NO_RECURSE */
 514
 515 /* These statements are here to stop the compiler complaining about unitialized
 516 variables. */
 517
 518 #ifdef SUPPORT_UCP
 519 prop_value = 0;
 520 prop_fail_result = 0;
 521 #endif
 522
 523
 524 /* This label is used for tail recursion, which is used in a few cases even
 525 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
 526 used. Thanks to Ian Taylor for noticing this possibility and sending the
 527 original patch. */
 528
 529 TAIL_RECURSE:
 530
 531 /* OK, now we can get on with the real code of the function. Recursive calls
 532 are specified by the macro RMATCH and RRETURN is used to return. When
 533 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
 534 and a "return", respectively (possibly with some debugging if DEBUG is
 535 defined). However, RMATCH isn't like a function call because it's quite a
 536 complicated macro. It has to be used in one particular way. This shouldn't,
 537 however, impact performance when true recursion is being used. */
 538
 539 /* First check that we haven't called match() too many times, or that we
 540 haven't exceeded the recursive call limit. */
 541
 542 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
 543 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
 544
 545 original_ims = ims;    /* Save for resetting on ')' */
 546
 547 #ifdef SUPPORT_UTF8
 548 utf8 = md->utf8;       /* Local copy of the flag */
 549 #else
 550 utf8 = FALSE;
 551 #endif
 552
 553 /* At the start of a group with an unlimited repeat that may match an empty
 554 string, the match_cbegroup flag is set. When this is the case, add the current
 555 subject pointer to the chain of such remembered pointers, to be checked when we
 556 hit the closing ket, in order to break infinite loops that match no characters.
 557 When match() is called in other circumstances, don't add to the chain. If this
 558 is a tail recursion, use a block from the workspace, as the one on the stack is
 559 already used. */
 560
 561 if ((flags & match_cbegroup) != 0)
 562   {
 563   eptrblock *p;
 564   if ((flags & match_tail_recursed) != 0)
 565     {
 566     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
 567     p = md->eptrchain + md->eptrn++;
 568     }
 569   else p = &newptrb;
 570   p->epb_saved_eptr = eptr;
 571   p->epb_prev = eptrb;
 572   eptrb = p;
 573   }
 574
 575 /* Now start processing the opcodes. */
 576
 577 for (;;)
 578   {
 579   minimize = possessive = FALSE;
 580   op = *ecode;
 581
 582   /* For partial matching, remember if we ever hit the end of the subject after
 583   matching at least one subject character. */
 584
 585   if (md->partial &&
 586       eptr >= md->end_subject &&
 587       eptr > md->start_match)
 588     md->hitend = TRUE;
 589
 590   switch(op)
 591     {
 592     /* Handle a capturing bracket. If there is space in the offset vector, save
 593     the current subject position in the working slot at the top of the vector.
 594     We mustn't change the current values of the data slot, because they may be
 595     set from a previous iteration of this group, and be referred to by a
 596     reference inside the group.
 597
 598     If the bracket fails to match, we need to restore this value and also the
 599     values of the final offsets, in case they were set by a previous iteration
 600     of the same bracket.
 601
 602     If there isn't enough space in the offset vector, treat this as if it were
 603     a non-capturing bracket. Don't worry about setting the flag for the error
 604     case here; that is handled in the code for KET. */
 605
 606     case OP_CBRA:
 607     case OP_SCBRA:
 608     number = GET2(ecode, 1+LINK_SIZE);
 609     offset = number << 1;
 610
 611 #ifdef DEBUG
 612     printf("start bracket %d\n", number);
 613     printf("subject=");
 614     pchars(eptr, 16, TRUE, md);
 615     printf("\n");
 616 #endif
 617
 618     if (offset < md->offset_max)
 619       {
 620       save_offset1 = md->offset_vector[offset];
 621       save_offset2 = md->offset_vector[offset+1];
 622       save_offset3 = md->offset_vector[md->offset_end - number];
 623       save_capture_last = md->capture_last;
 624
 625       DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
 626       md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
 627
 628       flags = (op == OP_SCBRA)? match_cbegroup : 0;
 629       do
 630         {
 631         RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
 632           ims, eptrb, flags);
 633         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 634         md->capture_last = save_capture_last;
 635         ecode += GET(ecode, 1);
 636         }
 637       while (*ecode == OP_ALT);
 638
 639       DPRINTF(("bracket %d failed\n", number));
 640
 641       md->offset_vector[offset] = save_offset1;
 642       md->offset_vector[offset+1] = save_offset2;
 643       md->offset_vector[md->offset_end - number] = save_offset3;
 644
 645       RRETURN(MATCH_NOMATCH);
 646       }
 647
 648     /* Insufficient room for saving captured contents. Treat as a non-capturing
 649     bracket. */
 650
 651     DPRINTF(("insufficient capture room: treat as non-capturing\n"));
 652
 653     /* Non-capturing bracket. Loop for all the alternatives. When we get to the
 654     final alternative within the brackets, we would return the result of a
 655     recursive call to match() whatever happened. We can reduce stack usage by
 656     turning this into a tail recursion. */
 657
 658     case OP_BRA:
 659     case OP_SBRA:
 660     DPRINTF(("start non-capturing bracket\n"));
 661     flags = (op >= OP_SBRA)? match_cbegroup : 0;
 662     for (;;)
 663       {
 664       if (ecode[GET(ecode, 1)] != OP_ALT)
 665         {
 666         ecode += _pcre_OP_lengths[*ecode];
 667         flags |= match_tail_recursed;
 668         DPRINTF(("bracket 0 tail recursion\n"));
 669         goto TAIL_RECURSE;
 670         }
 671
 672       /* For non-final alternatives, continue the loop for a NOMATCH result;
 673       otherwise return. */
 674
 675       RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
 676         eptrb, flags);
 677       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 678       ecode += GET(ecode, 1);
 679       }
 680     /* Control never reaches here. */
 681
 682     /* Conditional group: compilation checked that there are no more than
 683     two branches. If the condition is false, skipping the first branch takes us
 684     past the end if there is only one branch, but that's OK because that is
 685     exactly what going to the ket would do. As there is only one branch to be
 686     obeyed, we can use tail recursion to avoid using another stack frame. */
 687
 688     case OP_COND:
 689     case OP_SCOND:
 690     if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
 691       {
 692       offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
 693       condition = md->recursive != NULL &&
 694         (offset == RREF_ANY || offset == md->recursive->group_num);
 695       ecode += condition? 3 : GET(ecode, 1);
 696       }
 697
 698     else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
 699       {
 700       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
 701       condition = offset < offset_top && md->offset_vector[offset] >= 0;
 702       ecode += condition? 3 : GET(ecode, 1);
 703       }
 704
 705     else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
 706       {
 707       condition = FALSE;
 708       ecode += GET(ecode, 1);
 709       }
 710
 711     /* The condition is an assertion. Call match() to evaluate it - setting
 712     the final argument match_condassert causes it to stop at the end of an
 713     assertion. */
 714
 715     else
 716       {
 717       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
 718           match_condassert);
 719       if (rrc == MATCH_MATCH)
 720         {
 721         condition = TRUE;
 722         ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
 723         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
 724         }
 725       else if (rrc != MATCH_NOMATCH)
 726         {
 727         RRETURN(rrc);         /* Need braces because of following else */
 728         }
 729       else
 730         {
 731         condition = FALSE;
 732         ecode += GET(ecode, 1);
 733         }
 734       }
 735
 736     /* We are now at the branch that is to be obeyed. As there is only one,
 737     we can use tail recursion to avoid using another stack frame. If the second
 738     alternative doesn't exist, we can just plough on. */
 739
 740     if (condition || *ecode == OP_ALT)
 741       {
 742       ecode += 1 + LINK_SIZE;
 743       flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
 744       goto TAIL_RECURSE;
 745       }
 746     else
 747       {
 748       ecode += 1 + LINK_SIZE;
 749       }
 750     break;
 751
 752
 753     /* End of the pattern. If we are in a top-level recursion, we should
 754     restore the offsets appropriately and continue from after the call. */
 755
 756     case OP_END:
 757     if (md->recursive != NULL && md->recursive->group_num == 0)
 758       {
 759       recursion_info *rec = md->recursive;
 760       DPRINTF(("End of pattern in a (?0) recursion\n"));
 761       md->recursive = rec->prevrec;
 762       memmove(md->offset_vector, rec->offset_save,
 763         rec->saved_max * sizeof(int));
 764       md->start_match = rec->save_start;
 765       ims = original_ims;
 766       ecode = rec->after_call;
 767       break;
 768       }
 769
 770     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
 771     string - backtracking will then try other alternatives, if any. */
 772
 773     if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
 774     md->end_match_ptr = eptr;          /* Record where we ended */
 775     md->end_offset_top = offset_top;   /* and how many extracts were taken */
 776     RRETURN(MATCH_MATCH);
 777
 778     /* Change option settings */
 779
 780     case OP_OPT:
 781     ims = ecode[1];
 782     ecode += 2;
 783     DPRINTF(("ims set to %02lx\n", ims));
 784     break;
 785
 786     /* Assertion brackets. Check the alternative branches in turn - the
 787     matching won't pass the KET for an assertion. If any one branch matches,
 788     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 789     start of each branch to move the current point backwards, so the code at
 790     this level is identical to the lookahead case. */
 791
 792     case OP_ASSERT:
 793     case OP_ASSERTBACK:
 794     do
 795       {
 796       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
 797       if (rrc == MATCH_MATCH) break;
 798       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 799       ecode += GET(ecode, 1);
 800       }
 801     while (*ecode == OP_ALT);
 802     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
 803
 804     /* If checking an assertion for a condition, return MATCH_MATCH. */
 805
 806     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 807
 808     /* Continue from after the assertion, updating the offsets high water
 809     mark, since extracts may have been taken during the assertion. */
 810
 811     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
 812     ecode += 1 + LINK_SIZE;
 813     offset_top = md->end_offset_top;
 814     continue;
 815
 816     /* Negative assertion: all branches must fail to match */
 817
 818     case OP_ASSERT_NOT:
 819     case OP_ASSERTBACK_NOT:
 820     do
 821       {
 822       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
 823       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
 824       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 825       ecode += GET(ecode,1);
 826       }
 827     while (*ecode == OP_ALT);
 828
 829     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
 830
 831     ecode += 1 + LINK_SIZE;
 832     continue;
 833
 834     /* Move the subject pointer back. This occurs only at the start of
 835     each branch of a lookbehind assertion. If we are too close to the start to
 836     move back, this match function fails. When working with UTF-8 we move
 837     back a number of characters, not bytes. */
 838
 839     case OP_REVERSE:
 840 #ifdef SUPPORT_UTF8
 841     if (utf8)
 842       {
 843       i = GET(ecode, 1);
 844       while (i-- > 0)
 845         {
 846         eptr--;
 847         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 848         BACKCHAR(eptr)
 849         }
 850       }
 851     else
 852 #endif
 853
 854     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
 855
 856       {
 857       eptr -= GET(ecode, 1);
 858       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
 859       }
 860
 861     /* Skip to next op code */
 862
 863     ecode += 1 + LINK_SIZE;
 864     break;
 865
 866     /* The callout item calls an external function, if one is provided, passing
 867     details of the match so far. This is mainly for debugging, though the
 868     function is able to force a failure. */
 869
 870     case OP_CALLOUT:
 871     if (pcre_callout != NULL)
 872       {
 873       pcre_callout_block cb;
 874       cb.version          = 1;   /* Version 1 of the callout block */
 875       cb.callout_number   = ecode[1];
 876       cb.offset_vector    = md->offset_vector;
 877       cb.subject          = (PCRE_SPTR)md->start_subject;
 878       cb.subject_length   = md->end_subject - md->start_subject;
 879       cb.start_match      = md->start_match - md->start_subject;
 880       cb.current_position = eptr - md->start_subject;
 881       cb.pattern_position = GET(ecode, 2);
 882       cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
 883       cb.capture_top      = offset_top/2;
 884       cb.capture_last     = md->capture_last;
 885       cb.callout_data     = md->callout_data;
 886       if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
 887       if (rrc < 0) RRETURN(rrc);
 888       }
 889     ecode += 2 + 2*LINK_SIZE;
 890     break;
 891
 892     /* Recursion either matches the current regex, or some subexpression. The
 893     offset data is the offset to the starting bracket from the start of the
 894     whole pattern. (This is so that it works from duplicated subpatterns.)
 895
 896     If there are any capturing brackets started but not finished, we have to
 897     save their starting points and reinstate them after the recursion. However,
 898     we don't know how many such there are (offset_top records the completed
 899     total) so we just have to save all the potential data. There may be up to
 900     65535 such values, which is too large to put on the stack, but using malloc
 901     for small numbers seems expensive. As a compromise, the stack is used when
 902     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
 903     is used. A problem is what to do if the malloc fails ... there is no way of
 904     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
 905     values on the stack, and accept that the rest may be wrong.
 906
 907     There are also other values that have to be saved. We use a chained
 908     sequence of blocks that actually live on the stack. Thanks to Robin Houston
 909     for the original version of this logic. */
 910
 911     case OP_RECURSE:
 912       {
 913       callpat = md->start_code + GET(ecode, 1);
 914       new_recursive.group_num = (callpat == md->start_code)? 0 :
 915         GET2(callpat, 1 + LINK_SIZE);
 916
 917       /* Add to "recursing stack" */
 918
 919       new_recursive.prevrec = md->recursive;
 920       md->recursive = &new_recursive;
 921
 922       /* Find where to continue from afterwards */
 923
 924       ecode += 1 + LINK_SIZE;
 925       new_recursive.after_call = ecode;
 926
 927       /* Now save the offset data. */
 928
 929       new_recursive.saved_max = md->offset_end;
 930       if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
 931         new_recursive.offset_save = stacksave;
 932       else
 933         {
 934         new_recursive.offset_save =
 935           (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
 936         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
 937         }
 938
 939       memcpy(new_recursive.offset_save, md->offset_vector,
 940             new_recursive.saved_max * sizeof(int));
 941       new_recursive.save_start = md->start_match;
 942       md->start_match = eptr;
 943
 944       /* OK, now we can do the recursion. For each top-level alternative we
 945       restore the offset and recursion data. */
 946
 947       DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
 948       flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
 949       do
 950         {
 951         RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
 952           md, ims, eptrb, flags);
 953         if (rrc == MATCH_MATCH)
 954           {
 955           DPRINTF(("Recursion matched\n"));
 956           md->recursive = new_recursive.prevrec;
 957           if (new_recursive.offset_save != stacksave)
 958             (pcre_free)(new_recursive.offset_save);
 959           RRETURN(MATCH_MATCH);
 960           }
 961         else if (rrc != MATCH_NOMATCH)
 962           {
 963           DPRINTF(("Recursion gave error %d\n", rrc));
 964           RRETURN(rrc);
 965           }
 966
 967         md->recursive = &new_recursive;
 968         memcpy(md->offset_vector, new_recursive.offset_save,
 969             new_recursive.saved_max * sizeof(int));
 970         callpat += GET(callpat, 1);
 971         }
 972       while (*callpat == OP_ALT);
 973
 974       DPRINTF(("Recursion didn't match\n"));
 975       md->recursive = new_recursive.prevrec;
 976       if (new_recursive.offset_save != stacksave)
 977         (pcre_free)(new_recursive.offset_save);
 978       RRETURN(MATCH_NOMATCH);
 979       }
 980     /* Control never reaches here */
 981
 982     /* "Once" brackets are like assertion brackets except that after a match,
 983     the point in the subject string is not moved back. Thus there can never be
 984     a move back into the brackets. Friedl calls these "atomic" subpatterns.
 985     Check the alternative branches in turn - the matching won't pass the KET
 986     for this kind of subpattern. If any one branch matches, we carry on as at
 987     the end of a normal bracket, leaving the subject pointer. */
 988
 989     case OP_ONCE:
 990     prev = ecode;
 991     saved_eptr = eptr;
 992
 993     do
 994       {
 995       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
 996         eptrb, 0);
 997       if (rrc == MATCH_MATCH) break;
 998       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
 999       ecode += GET(ecode,1);
1000       }
1001     while (*ecode == OP_ALT);
1002
1003     /* If hit the end of the group (which could be repeated), fail */
1004
1005     if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1006
1007     /* Continue as from after the assertion, updating the offsets high water
1008     mark, since extracts may have been taken. */
1009
1010     do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1011
1012     offset_top = md->end_offset_top;
1013     eptr = md->end_match_ptr;
1014
1015     /* For a non-repeating ket, just continue at this level. This also
1016     happens for a repeating ket if no characters were matched in the group.
1017     This is the forcible breaking of infinite loops as implemented in Perl
1018     5.005. If there is an options reset, it will get obeyed in the normal
1019     course of events. */
1020
1021     if (*ecode == OP_KET || eptr == saved_eptr)
1022       {
1023       ecode += 1+LINK_SIZE;
1024       break;
1025       }
1026
1027     /* The repeating kets try the rest of the pattern or restart from the
1028     preceding bracket, in the appropriate order. The second "call" of match()
1029     uses tail recursion, to avoid using another stack frame. We need to reset
1030     any options that changed within the bracket before re-running it, so
1031     check the next opcode. */
1032
1033     if (ecode[1+LINK_SIZE] == OP_OPT)
1034       {
1035       ims = (ims & ~PCRE_IMS) | ecode[4];
1036       DPRINTF(("ims set to %02lx at group repeat\n", ims));
1037       }
1038
1039     if (*ecode == OP_KETRMIN)
1040       {
1041       RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1042       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1043       ecode = prev;
1044       flags = match_tail_recursed;
1045       goto TAIL_RECURSE;
1046       }
1047     else  /* OP_KETRMAX */
1048       {
1049       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1050       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1051       ecode += 1 + LINK_SIZE;
1052       flags = match_tail_recursed;
1053       goto TAIL_RECURSE;
1054       }
1055     /* Control never gets here */
1056
1057     /* An alternation is the end of a branch; scan along to find the end of the
1058     bracketed group and go to there. */
1059
1060     case OP_ALT:
1061     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1062     break;
1063
1064     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1065     that it may occur zero times. It may repeat infinitely, or not at all -
1066     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1067     repeat limits are compiled as a number of copies, with the optional ones
1068     preceded by BRAZERO or BRAMINZERO. */
1069
1070     case OP_BRAZERO:
1071       {
1072       next = ecode+1;
1073       RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1074       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1075       do next += GET(next,1); while (*next == OP_ALT);
1076       ecode = next + 1 + LINK_SIZE;
1077       }
1078     break;
1079
1080     case OP_BRAMINZERO:
1081       {
1082       next = ecode+1;
1083       do next += GET(next, 1); while (*next == OP_ALT);
1084       RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1085       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1086       ecode++;
1087       }
1088     break;
1089
1090     /* End of a group, repeated or non-repeating. */
1091
1092     case OP_KET:
1093     case OP_KETRMIN:
1094     case OP_KETRMAX:
1095     prev = ecode - GET(ecode, 1);
1096
1097     /* If this was a group that remembered the subject start, in order to break
1098     infinite repeats of empty string matches, retrieve the subject start from
1099     the chain. Otherwise, set it NULL. */
1100
1101     if (*prev >= OP_SBRA)
1102       {
1103       saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1104       eptrb = eptrb->epb_prev;              /* Backup to previous group */
1105       }
1106     else saved_eptr = NULL;
1107
1108     /* If we are at the end of an assertion group, stop matching and return
1109     MATCH_MATCH, but record the current high water mark for use by positive
1110     assertions. Do this also for the "once" (atomic) groups. */
1111
1112     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1113         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1114         *prev == OP_ONCE)
1115       {
1116       md->end_match_ptr = eptr;      /* For ONCE */
1117       md->end_offset_top = offset_top;
1118       RRETURN(MATCH_MATCH);
1119       }
1120
1121     /* For capturing groups we have to check the group number back at the start
1122     and if necessary complete handling an extraction by setting the offsets and
1123     bumping the high water mark. Note that whole-pattern recursion is coded as
1124     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1125     when the OP_END is reached. Other recursion is handled here. */
1126
1127     if (*prev == OP_CBRA || *prev == OP_SCBRA)
1128       {
1129       number = GET2(prev, 1+LINK_SIZE);
1130       offset = number << 1;
1131
1132 #ifdef DEBUG
1133       printf("end bracket %d", number);
1134       printf("\n");
1135 #endif
1136
1137       md->capture_last = number;
1138       if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1139         {
1140         md->offset_vector[offset] =
1141           md->offset_vector[md->offset_end - number];
1142         md->offset_vector[offset+1] = eptr - md->start_subject;
1143         if (offset_top <= offset) offset_top = offset + 2;
1144         }
1145
1146       /* Handle a recursively called group. Restore the offsets
1147       appropriately and continue from after the call. */
1148
1149       if (md->recursive != NULL && md->recursive->group_num == number)
1150         {
1151         recursion_info *rec = md->recursive;
1152         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1153         md->recursive = rec->prevrec;
1154         md->start_match = rec->save_start;
1155         memcpy(md->offset_vector, rec->offset_save,
1156           rec->saved_max * sizeof(int));
1157         ecode = rec->after_call;
1158         ims = original_ims;
1159         break;
1160         }
1161       }
1162
1163     /* For both capturing and non-capturing groups, reset the value of the ims
1164     flags, in case they got changed during the group. */
1165
1166     ims = original_ims;
1167     DPRINTF(("ims reset to %02lx\n", ims));
1168
1169     /* For a non-repeating ket, just continue at this level. This also
1170     happens for a repeating ket if no characters were matched in the group.
1171     This is the forcible breaking of infinite loops as implemented in Perl
1172     5.005. If there is an options reset, it will get obeyed in the normal
1173     course of events. */
1174
1175     if (*ecode == OP_KET || eptr == saved_eptr)
1176       {
1177       ecode += 1 + LINK_SIZE;
1178       break;
1179       }
1180
1181     /* The repeating kets try the rest of the pattern or restart from the
1182     preceding bracket, in the appropriate order. In the second case, we can use
1183     tail recursion to avoid using another stack frame. */
1184
1185     flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1186
1187     if (*ecode == OP_KETRMIN)
1188       {
1189       RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1190       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1191       ecode = prev;
1192       flags |= match_tail_recursed;
1193       goto TAIL_RECURSE;
1194       }
1195     else  /* OP_KETRMAX */
1196       {
1197       RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1198       if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1199       ecode += 1 + LINK_SIZE;
1200       flags = match_tail_recursed;
1201       goto TAIL_RECURSE;
1202       }
1203     /* Control never gets here */
1204
1205     /* Start of subject unless notbol, or after internal newline if multiline */
1206
1207     case OP_CIRC:
1208     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1209     if ((ims & PCRE_MULTILINE) != 0)
1210       {
1211       if (eptr != md->start_subject &&
1212           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1213         RRETURN(MATCH_NOMATCH);
1214       ecode++;
1215       break;
1216       }
1217     /* ... else fall through */
1218
1219     /* Start of subject assertion */
1220
1221     case OP_SOD:
1222     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1223     ecode++;
1224     break;
1225
1226     /* Start of match assertion */
1227
1228     case OP_SOM:
1229     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1230     ecode++;
1231     break;
1232
1233     /* Assert before internal newline if multiline, or before a terminating
1234     newline unless endonly is set, else end of subject unless noteol is set. */
1235
1236     case OP_DOLL:
1237     if ((ims & PCRE_MULTILINE) != 0)
1238       {
1239       if (eptr < md->end_subject)
1240         { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1241       else
1242         { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1243       ecode++;
1244       break;
1245       }
1246     else
1247       {
1248       if (md->noteol) RRETURN(MATCH_NOMATCH);
1249       if (!md->endonly)
1250         {
1251         if (eptr != md->end_subject &&
1252             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1253           RRETURN(MATCH_NOMATCH);
1254         ecode++;
1255         break;
1256         }
1257       }
1258     /* ... else fall through for endonly */
1259
1260     /* End of subject assertion (\z) */
1261
1262     case OP_EOD:
1263     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1264     ecode++;
1265     break;
1266
1267     /* End of subject or ending \n assertion (\Z) */
1268
1269     case OP_EODN:
1270     if (eptr != md->end_subject &&
1271         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1272       RRETURN(MATCH_NOMATCH);
1273     ecode++;
1274     break;
1275
1276     /* Word boundary assertions */
1277
1278     case OP_NOT_WORD_BOUNDARY:
1279     case OP_WORD_BOUNDARY:
1280       {
1281
1282       /* Find out if the previous and current characters are "word" characters.
1283       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1284       be "non-word" characters. */
1285
1286 #ifdef SUPPORT_UTF8
1287       if (utf8)
1288         {
1289         if (eptr == md->start_subject) prev_is_word = FALSE; else
1290           {
1291           const uschar *lastptr = eptr - 1;
1292           while((*lastptr & 0xc0) == 0x80) lastptr--;
1293           GETCHAR(c, lastptr);
1294           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1295           }
1296         if (eptr >= md->end_subject) cur_is_word = FALSE; else
1297           {
1298           GETCHAR(c, eptr);
1299           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1300           }
1301         }
1302       else
1303 #endif
1304
1305       /* More streamlined when not in UTF-8 mode */
1306
1307         {
1308         prev_is_word = (eptr != md->start_subject) &&
1309           ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1310         cur_is_word = (eptr < md->end_subject) &&
1311           ((md->ctypes[*eptr] & ctype_word) != 0);
1312         }
1313
1314       /* Now see if the situation is what we want */
1315
1316       if ((*ecode++ == OP_WORD_BOUNDARY)?
1317            cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1318         RRETURN(MATCH_NOMATCH);
1319       }
1320     break;
1321
1322     /* Match a single character type; inline for speed */
1323
1324     case OP_ANY:
1325     if ((ims & PCRE_DOTALL) == 0)
1326       {
1327       if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1328       }
1329     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1330     if (utf8)
1331       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1332     ecode++;
1333     break;
1334
1335     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1336     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1337
1338     case OP_ANYBYTE:
1339     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1340     ecode++;
1341     break;
1342
1343     case OP_NOT_DIGIT:
1344     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1345     GETCHARINCTEST(c, eptr);
1346     if (
1347 #ifdef SUPPORT_UTF8
1348        c < 256 &&
1349 #endif
1350        (md->ctypes[c] & ctype_digit) != 0
1351        )
1352       RRETURN(MATCH_NOMATCH);
1353     ecode++;
1354     break;
1355
1356     case OP_DIGIT:
1357     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1358     GETCHARINCTEST(c, eptr);
1359     if (
1360 #ifdef SUPPORT_UTF8
1361        c >= 256 ||
1362 #endif
1363        (md->ctypes[c] & ctype_digit) == 0
1364        )
1365       RRETURN(MATCH_NOMATCH);
1366     ecode++;
1367     break;
1368
1369     case OP_NOT_WHITESPACE:
1370     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1371     GETCHARINCTEST(c, eptr);
1372     if (
1373 #ifdef SUPPORT_UTF8
1374        c < 256 &&
1375 #endif
1376        (md->ctypes[c] & ctype_space) != 0
1377        )
1378       RRETURN(MATCH_NOMATCH);
1379     ecode++;
1380     break;
1381
1382     case OP_WHITESPACE:
1383     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1384     GETCHARINCTEST(c, eptr);
1385     if (
1386 #ifdef SUPPORT_UTF8
1387        c >= 256 ||
1388 #endif
1389        (md->ctypes[c] & ctype_space) == 0
1390        )
1391       RRETURN(MATCH_NOMATCH);
1392     ecode++;
1393     break;
1394
1395     case OP_NOT_WORDCHAR:
1396     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1397     GETCHARINCTEST(c, eptr);
1398     if (
1399 #ifdef SUPPORT_UTF8
1400        c < 256 &&
1401 #endif
1402        (md->ctypes[c] & ctype_word) != 0
1403        )
1404       RRETURN(MATCH_NOMATCH);
1405     ecode++;
1406     break;
1407
1408     case OP_WORDCHAR:
1409     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1410     GETCHARINCTEST(c, eptr);
1411     if (
1412 #ifdef SUPPORT_UTF8
1413        c >= 256 ||
1414 #endif
1415        (md->ctypes[c] & ctype_word) == 0
1416        )
1417       RRETURN(MATCH_NOMATCH);
1418     ecode++;
1419     break;
1420
1421     case OP_ANYNL:
1422     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1423     GETCHARINCTEST(c, eptr);
1424     switch(c)
1425       {
1426       default: RRETURN(MATCH_NOMATCH);
1427       case 0x000d:
1428       if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1429       break;
1430       case 0x000a:
1431       case 0x000b:
1432       case 0x000c:
1433       case 0x0085:
1434       case 0x2028:
1435       case 0x2029:
1436       break;
1437       }
1438     ecode++;
1439     break;
1440
1441 #ifdef SUPPORT_UCP
1442     /* Check the next character by Unicode property. We will get here only
1443     if the support is in the binary; otherwise a compile-time error occurs. */
1444
1445     case OP_PROP:
1446     case OP_NOTPROP:
1447     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1448     GETCHARINCTEST(c, eptr);
1449       {
1450       int chartype, script;
1451       int category = _pcre_ucp_findprop(c, &chartype, &script);
1452
1453       switch(ecode[1])
1454         {
1455         case PT_ANY:
1456         if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1457         break;
1458
1459         case PT_LAMP:
1460         if ((chartype == ucp_Lu ||
1461              chartype == ucp_Ll ||
1462              chartype == ucp_Lt) == (op == OP_NOTPROP))
1463           RRETURN(MATCH_NOMATCH);
1464          break;
1465
1466         case PT_GC:
1467         if ((ecode[2] != category) == (op == OP_PROP))
1468           RRETURN(MATCH_NOMATCH);
1469         break;
1470
1471         case PT_PC:
1472         if ((ecode[2] != chartype) == (op == OP_PROP))
1473           RRETURN(MATCH_NOMATCH);
1474         break;
1475
1476         case PT_SC:
1477         if ((ecode[2] != script) == (op == OP_PROP))
1478           RRETURN(MATCH_NOMATCH);
1479         break;
1480
1481         default:
1482         RRETURN(PCRE_ERROR_INTERNAL);
1483         }
1484
1485       ecode += 3;
1486       }
1487     break;
1488
1489     /* Match an extended Unicode sequence. We will get here only if the support
1490     is in the binary; otherwise a compile-time error occurs. */
1491
1492     case OP_EXTUNI:
1493     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1494     GETCHARINCTEST(c, eptr);
1495       {
1496       int chartype, script;
1497       int category = _pcre_ucp_findprop(c, &chartype, &script);
1498       if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1499       while (eptr < md->end_subject)
1500         {
1501         int len = 1;
1502         if (!utf8) c = *eptr; else
1503           {
1504           GETCHARLEN(c, eptr, len);
1505           }
1506         category = _pcre_ucp_findprop(c, &chartype, &script);
1507         if (category != ucp_M) break;
1508         eptr += len;
1509         }
1510       }
1511     ecode++;
1512     break;
1513 #endif
1514
1515
1516     /* Match a back reference, possibly repeatedly. Look past the end of the
1517     item to see if there is repeat information following. The code is similar
1518     to that for character classes, but repeated for efficiency. Then obey
1519     similar code to character type repeats - written out again for speed.
1520     However, if the referenced string is the empty string, always treat
1521     it as matched, any number of times (otherwise there could be infinite
1522     loops). */
1523
1524     case OP_REF:
1525       {
1526       offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1527       ecode += 3;                                 /* Advance past item */
1528
1529       /* If the reference is unset, set the length to be longer than the amount
1530       of subject left; this ensures that every attempt at a match fails. We
1531       can't just fail here, because of the possibility of quantifiers with zero
1532       minima. */
1533
1534       length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1535         md->end_subject - eptr + 1 :
1536         md->offset_vector[offset+1] - md->offset_vector[offset];
1537
1538       /* Set up for repetition, or handle the non-repeated case */
1539
1540       switch (*ecode)
1541         {
1542         case OP_CRSTAR:
1543         case OP_CRMINSTAR:
1544         case OP_CRPLUS:
1545         case OP_CRMINPLUS:
1546         case OP_CRQUERY:
1547         case OP_CRMINQUERY:
1548         c = *ecode++ - OP_CRSTAR;
1549         minimize = (c & 1) != 0;
1550         min = rep_min[c];                 /* Pick up values from tables; */
1551         max = rep_max[c];                 /* zero for max => infinity */
1552         if (max == 0) max = INT_MAX;
1553         break;
1554
1555         case OP_CRRANGE:
1556         case OP_CRMINRANGE:
1557         minimize = (*ecode == OP_CRMINRANGE);
1558         min = GET2(ecode, 1);
1559         max = GET2(ecode, 3);
1560         if (max == 0) max = INT_MAX;
1561         ecode += 5;
1562         break;
1563
1564         default:               /* No repeat follows */
1565         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1566         eptr += length;
1567         continue;              /* With the main loop */
1568         }
1569
1570       /* If the length of the reference is zero, just continue with the
1571       main loop. */
1572
1573       if (length == 0) continue;
1574
1575       /* First, ensure the minimum number of matches are present. We get back
1576       the length of the reference string explicitly rather than passing the
1577       address of eptr, so that eptr can be a register variable. */
1578
1579       for (i = 1; i <= min; i++)
1580         {
1581         if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1582         eptr += length;
1583         }
1584
1585       /* If min = max, continue at the same level without recursion.
1586       They are not both allowed to be zero. */
1587
1588       if (min == max) continue;
1589
1590       /* If minimizing, keep trying and advancing the pointer */
1591
1592       if (minimize)
1593         {
1594         for (fi = min;; fi++)
1595           {
1596           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1597           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1598           if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1599             RRETURN(MATCH_NOMATCH);
1600           eptr += length;
1601           }
1602         /* Control never gets here */
1603         }
1604
1605       /* If maximizing, find the longest string and work backwards */
1606
1607       else
1608         {
1609         pp = eptr;
1610         for (i = min; i < max; i++)
1611           {
1612           if (!match_ref(offset, eptr, length, md, ims)) break;
1613           eptr += length;
1614           }
1615         while (eptr >= pp)
1616           {
1617           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1618           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1619           eptr -= length;
1620           }
1621         RRETURN(MATCH_NOMATCH);
1622         }
1623       }
1624     /* Control never gets here */
1625
1626
1627
1628     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1629     used when all the characters in the class have values in the range 0-255,
1630     and either the matching is caseful, or the characters are in the range
1631     0-127 when UTF-8 processing is enabled. The only difference between
1632     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1633     encountered.
1634
1635     First, look past the end of the item to see if there is repeat information
1636     following. Then obey similar code to character type repeats - written out
1637     again for speed. */
1638
1639     case OP_NCLASS:
1640     case OP_CLASS:
1641       {
1642       data = ecode + 1;                /* Save for matching */
1643       ecode += 33;                     /* Advance past the item */
1644
1645       switch (*ecode)
1646         {
1647         case OP_CRSTAR:
1648         case OP_CRMINSTAR:
1649         case OP_CRPLUS:
1650         case OP_CRMINPLUS:
1651         case OP_CRQUERY:
1652         case OP_CRMINQUERY:
1653         c = *ecode++ - OP_CRSTAR;
1654         minimize = (c & 1) != 0;
1655         min = rep_min[c];                 /* Pick up values from tables; */
1656         max = rep_max[c];                 /* zero for max => infinity */
1657         if (max == 0) max = INT_MAX;
1658         break;
1659
1660         case OP_CRRANGE:
1661         case OP_CRMINRANGE:
1662         minimize = (*ecode == OP_CRMINRANGE);
1663         min = GET2(ecode, 1);
1664         max = GET2(ecode, 3);
1665         if (max == 0) max = INT_MAX;
1666         ecode += 5;
1667         break;
1668
1669         default:               /* No repeat follows */
1670         min = max = 1;
1671         break;
1672         }
1673
1674       /* First, ensure the minimum number of matches are present. */
1675
1676 #ifdef SUPPORT_UTF8
1677       /* UTF-8 mode */
1678       if (utf8)
1679         {
1680         for (i = 1; i <= min; i++)
1681           {
1682           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1683           GETCHARINC(c, eptr);
1684           if (c > 255)
1685             {
1686             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1687             }
1688           else
1689             {
1690             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1691             }
1692           }
1693         }
1694       else
1695 #endif
1696       /* Not UTF-8 mode */
1697         {
1698         for (i = 1; i <= min; i++)
1699           {
1700           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1701           c = *eptr++;
1702           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1703           }
1704         }
1705
1706       /* If max == min we can continue with the main loop without the
1707       need to recurse. */
1708
1709       if (min == max) continue;
1710
1711       /* If minimizing, keep testing the rest of the expression and advancing
1712       the pointer while it matches the class. */
1713
1714       if (minimize)
1715         {
1716 #ifdef SUPPORT_UTF8
1717         /* UTF-8 mode */
1718         if (utf8)
1719           {
1720           for (fi = min;; fi++)
1721             {
1722             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1723             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1724             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1725             GETCHARINC(c, eptr);
1726             if (c > 255)
1727               {
1728               if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1729               }
1730             else
1731               {
1732               if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1733               }
1734             }
1735           }
1736         else
1737 #endif
1738         /* Not UTF-8 mode */
1739           {
1740           for (fi = min;; fi++)
1741             {
1742             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1743             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1744             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1745             c = *eptr++;
1746             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1747             }
1748           }
1749         /* Control never gets here */
1750         }
1751
1752       /* If maximizing, find the longest possible run, then work backwards. */
1753
1754       else
1755         {
1756         pp = eptr;
1757
1758 #ifdef SUPPORT_UTF8
1759         /* UTF-8 mode */
1760         if (utf8)
1761           {
1762           for (i = min; i < max; i++)
1763             {
1764             int len = 1;
1765             if (eptr >= md->end_subject) break;
1766             GETCHARLEN(c, eptr, len);
1767             if (c > 255)
1768               {
1769               if (op == OP_CLASS) break;
1770               }
1771             else
1772               {
1773               if ((data[c/8] & (1 << (c&7))) == 0) break;
1774               }
1775             eptr += len;
1776             }
1777           for (;;)
1778             {
1779             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1780             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1781             if (eptr-- == pp) break;        /* Stop if tried at original pos */
1782             BACKCHAR(eptr);
1783             }
1784           }
1785         else
1786 #endif
1787           /* Not UTF-8 mode */
1788           {
1789           for (i = min; i < max; i++)
1790             {
1791             if (eptr >= md->end_subject) break;
1792             c = *eptr;
1793             if ((data[c/8] & (1 << (c&7))) == 0) break;
1794             eptr++;
1795             }
1796           while (eptr >= pp)
1797             {
1798             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1799             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1800             eptr--;
1801             }
1802           }
1803
1804         RRETURN(MATCH_NOMATCH);
1805         }
1806       }
1807     /* Control never gets here */
1808
1809
1810     /* Match an extended character class. This opcode is encountered only
1811     in UTF-8 mode, because that's the only time it is compiled. */
1812
1813 #ifdef SUPPORT_UTF8
1814     case OP_XCLASS:
1815       {
1816       data = ecode + 1 + LINK_SIZE;                /* Save for matching */
1817       ecode += GET(ecode, 1);                      /* Advance past the item */
1818
1819       switch (*ecode)
1820         {
1821         case OP_CRSTAR:
1822         case OP_CRMINSTAR:
1823         case OP_CRPLUS:
1824         case OP_CRMINPLUS:
1825         case OP_CRQUERY:
1826         case OP_CRMINQUERY:
1827         c = *ecode++ - OP_CRSTAR;
1828         minimize = (c & 1) != 0;
1829         min = rep_min[c];                 /* Pick up values from tables; */
1830         max = rep_max[c];                 /* zero for max => infinity */
1831         if (max == 0) max = INT_MAX;
1832         break;
1833
1834         case OP_CRRANGE:
1835         case OP_CRMINRANGE:
1836         minimize = (*ecode == OP_CRMINRANGE);
1837         min = GET2(ecode, 1);
1838         max = GET2(ecode, 3);
1839         if (max == 0) max = INT_MAX;
1840         ecode += 5;
1841         break;
1842
1843         default:               /* No repeat follows */
1844         min = max = 1;
1845         break;
1846         }
1847
1848       /* First, ensure the minimum number of matches are present. */
1849
1850       for (i = 1; i <= min; i++)
1851         {
1852         if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1853         GETCHARINC(c, eptr);
1854         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1855         }
1856
1857       /* If max == min we can continue with the main loop without the
1858       need to recurse. */
1859
1860       if (min == max) continue;
1861
1862       /* If minimizing, keep testing the rest of the expression and advancing
1863       the pointer while it matches the class. */
1864
1865       if (minimize)
1866         {
1867         for (fi = min;; fi++)
1868           {
1869           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1870           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1871           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1872           GETCHARINC(c, eptr);
1873           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1874           }
1875         /* Control never gets here */
1876         }
1877
1878       /* If maximizing, find the longest possible run, then work backwards. */
1879
1880       else
1881         {
1882         pp = eptr;
1883         for (i = min; i < max; i++)
1884           {
1885           int len = 1;
1886           if (eptr >= md->end_subject) break;
1887           GETCHARLEN(c, eptr, len);
1888           if (!_pcre_xclass(c, data)) break;
1889           eptr += len;
1890           }
1891         for(;;)
1892           {
1893           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1894           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1895           if (eptr-- == pp) break;        /* Stop if tried at original pos */
1896           BACKCHAR(eptr)
1897           }
1898         RRETURN(MATCH_NOMATCH);
1899         }
1900
1901       /* Control never gets here */
1902       }
1903 #endif    /* End of XCLASS */
1904
1905     /* Match a single character, casefully */
1906
1907     case OP_CHAR:
1908 #ifdef SUPPORT_UTF8
1909     if (utf8)
1910       {
1911       length = 1;
1912       ecode++;
1913       GETCHARLEN(fc, ecode, length);
1914       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1915       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1916       }
1917     else
1918 #endif
1919
1920     /* Non-UTF-8 mode */
1921       {
1922       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1923       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1924       ecode += 2;
1925       }
1926     break;
1927
1928     /* Match a single character, caselessly */
1929
1930     case OP_CHARNC:
1931 #ifdef SUPPORT_UTF8
1932     if (utf8)
1933       {
1934       length = 1;
1935       ecode++;
1936       GETCHARLEN(fc, ecode, length);
1937
1938       if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1939
1940       /* If the pattern character's value is < 128, we have only one byte, and
1941       can use the fast lookup table. */
1942
1943       if (fc < 128)
1944         {
1945         if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1946         }
1947
1948       /* Otherwise we must pick up the subject character */
1949
1950       else
1951         {
1952         unsigned int dc;
1953         GETCHARINC(dc, eptr);
1954         ecode += length;
1955
1956         /* If we have Unicode property support, we can use it to test the other
1957         case of the character, if there is one. */
1958
1959         if (fc != dc)
1960           {
1961 #ifdef SUPPORT_UCP
1962           if (dc != _pcre_ucp_othercase(fc))
1963 #endif
1964             RRETURN(MATCH_NOMATCH);
1965           }
1966         }
1967       }
1968     else
1969 #endif   /* SUPPORT_UTF8 */
1970
1971     /* Non-UTF-8 mode */
1972       {
1973       if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1974       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1975       ecode += 2;
1976       }
1977     break;
1978
1979     /* Match a single character repeatedly. */
1980
1981     case OP_EXACT:
1982     min = max = GET2(ecode, 1);
1983     ecode += 3;
1984     goto REPEATCHAR;
1985
1986     case OP_POSUPTO:
1987     possessive = TRUE;
1988     /* Fall through */
1989
1990     case OP_UPTO:
1991     case OP_MINUPTO:
1992     min = 0;
1993     max = GET2(ecode, 1);
1994     minimize = *ecode == OP_MINUPTO;
1995     ecode += 3;
1996     goto REPEATCHAR;
1997
1998     case OP_POSSTAR:
1999     possessive = TRUE;
2000     min = 0;
2001     max = INT_MAX;
2002     ecode++;
2003     goto REPEATCHAR;
2004
2005     case OP_POSPLUS:
2006     possessive = TRUE;
2007     min = 1;
2008     max = INT_MAX;
2009     ecode++;
2010     goto REPEATCHAR;
2011
2012     case OP_POSQUERY:
2013     possessive = TRUE;
2014     min = 0;
2015     max = 1;
2016     ecode++;
2017     goto REPEATCHAR;
2018
2019     case OP_STAR:
2020     case OP_MINSTAR:
2021     case OP_PLUS:
2022     case OP_MINPLUS:
2023     case OP_QUERY:
2024     case OP_MINQUERY:
2025     c = *ecode++ - OP_STAR;
2026     minimize = (c & 1) != 0;
2027     min = rep_min[c];                 /* Pick up values from tables; */
2028     max = rep_max[c];                 /* zero for max => infinity */
2029     if (max == 0) max = INT_MAX;
2030
2031     /* Common code for all repeated single-character matches. We can give
2032     up quickly if there are fewer than the minimum number of characters left in
2033     the subject. */
2034
2035     REPEATCHAR:
2036 #ifdef SUPPORT_UTF8
2037     if (utf8)
2038       {
2039       length = 1;
2040       charptr = ecode;
2041       GETCHARLEN(fc, ecode, length);
2042       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2043       ecode += length;
2044
2045       /* Handle multibyte character matching specially here. There is
2046       support for caseless matching if UCP support is present. */
2047
2048       if (length > 1)
2049         {
2050         int oclength = 0;
2051         uschar occhars[8];
2052
2053 #ifdef SUPPORT_UCP
2054         unsigned int othercase;
2055         if ((ims & PCRE_CASELESS) != 0 &&
2056             (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2057           oclength = _pcre_ord2utf8(othercase, occhars);
2058 #endif  /* SUPPORT_UCP */
2059
2060         for (i = 1; i <= min; i++)
2061           {
2062           if (memcmp(eptr, charptr, length) == 0) eptr += length;
2063           /* Need braces because of following else */
2064           else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2065           else
2066             {
2067             if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2068             eptr += oclength;
2069             }
2070           }
2071
2072         if (min == max) continue;
2073
2074         if (minimize)
2075           {
2076           for (fi = min;; fi++)
2077             {
2078             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2079             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2080             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2081             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2082             /* Need braces because of following else */
2083             else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2084             else
2085               {
2086               if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2087               eptr += oclength;
2088               }
2089             }
2090           /* Control never gets here */
2091           }
2092
2093         else  /* Maximize */
2094           {
2095           pp = eptr;
2096           for (i = min; i < max; i++)
2097             {
2098             if (eptr > md->end_subject - length) break;
2099             if (memcmp(eptr, charptr, length) == 0) eptr += length;
2100             else if (oclength == 0) break;
2101             else
2102               {
2103               if (memcmp(eptr, occhars, oclength) != 0) break;
2104               eptr += oclength;
2105               }
2106             }
2107
2108           if (possessive) continue;
2109           while (eptr >= pp)
2110            {
2111            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2112            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2113            eptr -= length;
2114            }
2115           RRETURN(MATCH_NOMATCH);
2116           }
2117         /* Control never gets here */
2118         }
2119
2120       /* If the length of a UTF-8 character is 1, we fall through here, and
2121       obey the code as for non-UTF-8 characters below, though in this case the
2122       value of fc will always be < 128. */
2123       }
2124     else
2125 #endif  /* SUPPORT_UTF8 */
2126
2127     /* When not in UTF-8 mode, load a single-byte character. */
2128       {
2129       if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2130       fc = *ecode++;
2131       }
2132
2133     /* The value of fc at this point is always less than 256, though we may or
2134     may not be in UTF-8 mode. The code is duplicated for the caseless and
2135     caseful cases, for speed, since matching characters is likely to be quite
2136     common. First, ensure the minimum number of matches are present. If min =
2137     max, continue at the same level without recursing. Otherwise, if
2138     minimizing, keep trying the rest of the expression and advancing one
2139     matching character if failing, up to the maximum. Alternatively, if
2140     maximizing, find the maximum number of characters and work backwards. */
2141
2142     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2143       max, eptr));
2144
2145     if ((ims & PCRE_CASELESS) != 0)
2146       {
2147       fc = md->lcc[fc];
2148       for (i = 1; i <= min; i++)
2149         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2150       if (min == max) continue;
2151       if (minimize)
2152         {
2153         for (fi = min;; fi++)
2154           {
2155           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2156           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2157           if (fi >= max || eptr >= md->end_subject ||
2158               fc != md->lcc[*eptr++])
2159             RRETURN(MATCH_NOMATCH);
2160           }
2161         /* Control never gets here */
2162         }
2163       else  /* Maximize */
2164         {
2165         pp = eptr;
2166         for (i = min; i < max; i++)
2167           {
2168           if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2169           eptr++;
2170           }
2171         if (possessive) continue;
2172         while (eptr >= pp)
2173           {
2174           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2175           eptr--;
2176           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2177           }
2178         RRETURN(MATCH_NOMATCH);
2179         }
2180       /* Control never gets here */
2181       }
2182
2183     /* Caseful comparisons (includes all multi-byte characters) */
2184
2185     else
2186       {
2187       for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2188       if (min == max) continue;
2189       if (minimize)
2190         {
2191         for (fi = min;; fi++)
2192           {
2193           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2194           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2195           if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2196             RRETURN(MATCH_NOMATCH);
2197           }
2198         /* Control never gets here */
2199         }
2200       else  /* Maximize */
2201         {
2202         pp = eptr;
2203         for (i = min; i < max; i++)
2204           {
2205           if (eptr >= md->end_subject || fc != *eptr) break;
2206           eptr++;
2207           }
2208         if (possessive) continue;
2209         while (eptr >= pp)
2210           {
2211           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2212           eptr--;
2213           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2214           }
2215         RRETURN(MATCH_NOMATCH);
2216         }
2217       }
2218     /* Control never gets here */
2219
2220     /* Match a negated single one-byte character. The character we are
2221     checking can be multibyte. */
2222
2223     case OP_NOT:
2224     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2225     ecode++;
2226     GETCHARINCTEST(c, eptr);
2227     if ((ims & PCRE_CASELESS) != 0)
2228       {
2229 #ifdef SUPPORT_UTF8
2230       if (c < 256)
2231 #endif
2232       c = md->lcc[c];
2233       if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2234       }
2235     else
2236       {
2237       if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2238       }
2239     break;
2240
2241     /* Match a negated single one-byte character repeatedly. This is almost a
2242     repeat of the code for a repeated single character, but I haven't found a
2243     nice way of commoning these up that doesn't require a test of the
2244     positive/negative option for each character match. Maybe that wouldn't add
2245     very much to the time taken, but character matching *is* what this is all
2246     about... */
2247
2248     case OP_NOTEXACT:
2249     min = max = GET2(ecode, 1);
2250     ecode += 3;
2251     goto REPEATNOTCHAR;
2252
2253     case OP_NOTUPTO:
2254     case OP_NOTMINUPTO:
2255     min = 0;
2256     max = GET2(ecode, 1);
2257     minimize = *ecode == OP_NOTMINUPTO;
2258     ecode += 3;
2259     goto REPEATNOTCHAR;
2260
2261     case OP_NOTPOSSTAR:
2262     possessive = TRUE;
2263     min = 0;
2264     max = INT_MAX;
2265     ecode++;
2266     goto REPEATNOTCHAR;
2267
2268     case OP_NOTPOSPLUS:
2269     possessive = TRUE;
2270     min = 1;
2271     max = INT_MAX;
2272     ecode++;
2273     goto REPEATNOTCHAR;
2274
2275     case OP_NOTPOSQUERY:
2276     possessive = TRUE;
2277     min = 0;
2278     max = 1;
2279     ecode++;
2280     goto REPEATNOTCHAR;
2281
2282     case OP_NOTPOSUPTO:
2283     possessive = TRUE;
2284     min = 0;
2285     max = GET2(ecode, 1);
2286     ecode += 3;
2287     goto REPEATNOTCHAR;
2288
2289     case OP_NOTSTAR:
2290     case OP_NOTMINSTAR:
2291     case OP_NOTPLUS:
2292     case OP_NOTMINPLUS:
2293     case OP_NOTQUERY:
2294     case OP_NOTMINQUERY:
2295     c = *ecode++ - OP_NOTSTAR;
2296     minimize = (c & 1) != 0;
2297     min = rep_min[c];                 /* Pick up values from tables; */
2298     max = rep_max[c];                 /* zero for max => infinity */
2299     if (max == 0) max = INT_MAX;
2300
2301     /* Common code for all repeated single-byte matches. We can give up quickly
2302     if there are fewer than the minimum number of bytes left in the
2303     subject. */
2304
2305     REPEATNOTCHAR:
2306     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2307     fc = *ecode++;
2308
2309     /* The code is duplicated for the caseless and caseful cases, for speed,
2310     since matching characters is likely to be quite common. First, ensure the
2311     minimum number of matches are present. If min = max, continue at the same
2312     level without recursing. Otherwise, if minimizing, keep trying the rest of
2313     the expression and advancing one matching character if failing, up to the
2314     maximum. Alternatively, if maximizing, find the maximum number of
2315     characters and work backwards. */
2316
2317     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2318       max, eptr));
2319
2320     if ((ims & PCRE_CASELESS) != 0)
2321       {
2322       fc = md->lcc[fc];
2323
2324 #ifdef SUPPORT_UTF8
2325       /* UTF-8 mode */
2326       if (utf8)
2327         {
2328         register unsigned int d;
2329         for (i = 1; i <= min; i++)
2330           {
2331           GETCHARINC(d, eptr);
2332           if (d < 256) d = md->lcc[d];
2333           if (fc == d) RRETURN(MATCH_NOMATCH);
2334           }
2335         }
2336       else
2337 #endif
2338
2339       /* Not UTF-8 mode */
2340         {
2341         for (i = 1; i <= min; i++)
2342           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2343         }
2344
2345       if (min == max) continue;
2346
2347       if (minimize)
2348         {
2349 #ifdef SUPPORT_UTF8
2350         /* UTF-8 mode */
2351         if (utf8)
2352           {
2353           register unsigned int d;
2354           for (fi = min;; fi++)
2355             {
2356             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2357             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2358             GETCHARINC(d, eptr);
2359             if (d < 256) d = md->lcc[d];
2360             if (fi >= max || eptr >= md->end_subject || fc == d)
2361               RRETURN(MATCH_NOMATCH);
2362             }
2363           }
2364         else
2365 #endif
2366         /* Not UTF-8 mode */
2367           {
2368           for (fi = min;; fi++)
2369             {
2370             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2371             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2372             if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2373               RRETURN(MATCH_NOMATCH);
2374             }
2375           }
2376         /* Control never gets here */
2377         }
2378
2379       /* Maximize case */
2380
2381       else
2382         {
2383         pp = eptr;
2384
2385 #ifdef SUPPORT_UTF8
2386         /* UTF-8 mode */
2387         if (utf8)
2388           {
2389           register unsigned int d;
2390           for (i = min; i < max; i++)
2391             {
2392             int len = 1;
2393             if (eptr >= md->end_subject) break;
2394             GETCHARLEN(d, eptr, len);
2395             if (d < 256) d = md->lcc[d];
2396             if (fc == d) break;
2397             eptr += len;
2398             }
2399         if (possessive) continue;
2400         for(;;)
2401             {
2402             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2403             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2404             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2405             BACKCHAR(eptr);
2406             }
2407           }
2408         else
2409 #endif
2410         /* Not UTF-8 mode */
2411           {
2412           for (i = min; i < max; i++)
2413             {
2414             if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2415             eptr++;
2416             }
2417           if (possessive) continue;
2418           while (eptr >= pp)
2419             {
2420             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2421             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422             eptr--;
2423             }
2424           }
2425
2426         RRETURN(MATCH_NOMATCH);
2427         }
2428       /* Control never gets here */
2429       }
2430
2431     /* Caseful comparisons */
2432
2433     else
2434       {
2435 #ifdef SUPPORT_UTF8
2436       /* UTF-8 mode */
2437       if (utf8)
2438         {
2439         register unsigned int d;
2440         for (i = 1; i <= min; i++)
2441           {
2442           GETCHARINC(d, eptr);
2443           if (fc == d) RRETURN(MATCH_NOMATCH);
2444           }
2445         }
2446       else
2447 #endif
2448       /* Not UTF-8 mode */
2449         {
2450         for (i = 1; i <= min; i++)
2451           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2452         }
2453
2454       if (min == max) continue;
2455
2456       if (minimize)
2457         {
2458 #ifdef SUPPORT_UTF8
2459         /* UTF-8 mode */
2460         if (utf8)
2461           {
2462           register unsigned int d;
2463           for (fi = min;; fi++)
2464             {
2465             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2466             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2467             GETCHARINC(d, eptr);
2468             if (fi >= max || eptr >= md->end_subject || fc == d)
2469               RRETURN(MATCH_NOMATCH);
2470             }
2471           }
2472         else
2473 #endif
2474         /* Not UTF-8 mode */
2475           {
2476           for (fi = min;; fi++)
2477             {
2478             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2479             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2480             if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2481               RRETURN(MATCH_NOMATCH);
2482             }
2483           }
2484         /* Control never gets here */
2485         }
2486
2487       /* Maximize case */
2488
2489       else
2490         {
2491         pp = eptr;
2492
2493 #ifdef SUPPORT_UTF8
2494         /* UTF-8 mode */
2495         if (utf8)
2496           {
2497           register unsigned int d;
2498           for (i = min; i < max; i++)
2499             {
2500             int len = 1;
2501             if (eptr >= md->end_subject) break;
2502             GETCHARLEN(d, eptr, len);
2503             if (fc == d) break;
2504             eptr += len;
2505             }
2506           if (possessive) continue;
2507           for(;;)
2508             {
2509             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2510             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2511             if (eptr-- == pp) break;        /* Stop if tried at original pos */
2512             BACKCHAR(eptr);
2513             }
2514           }
2515         else
2516 #endif
2517         /* Not UTF-8 mode */
2518           {
2519           for (i = min; i < max; i++)
2520             {
2521             if (eptr >= md->end_subject || fc == *eptr) break;
2522             eptr++;
2523             }
2524           if (possessive) continue;
2525           while (eptr >= pp)
2526             {
2527             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2528             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2529             eptr--;
2530             }
2531           }
2532
2533         RRETURN(MATCH_NOMATCH);
2534         }
2535       }
2536     /* Control never gets here */
2537
2538     /* Match a single character type repeatedly; several different opcodes
2539     share code. This is very similar to the code for single characters, but we
2540     repeat it in the interests of efficiency. */
2541
2542     case OP_TYPEEXACT:
2543     min = max = GET2(ecode, 1);
2544     minimize = TRUE;
2545     ecode += 3;
2546     goto REPEATTYPE;
2547
2548     case OP_TYPEUPTO:
2549     case OP_TYPEMINUPTO:
2550     min = 0;
2551     max = GET2(ecode, 1);
2552     minimize = *ecode == OP_TYPEMINUPTO;
2553     ecode += 3;
2554     goto REPEATTYPE;
2555
2556     case OP_TYPEPOSSTAR:
2557     possessive = TRUE;
2558     min = 0;
2559     max = INT_MAX;
2560     ecode++;
2561     goto REPEATTYPE;
2562
2563     case OP_TYPEPOSPLUS:
2564     possessive = TRUE;
2565     min = 1;
2566     max = INT_MAX;
2567     ecode++;
2568     goto REPEATTYPE;
2569
2570     case OP_TYPEPOSQUERY:
2571     possessive = TRUE;
2572     min = 0;
2573     max = 1;
2574     ecode++;
2575     goto REPEATTYPE;
2576
2577     case OP_TYPEPOSUPTO:
2578     possessive = TRUE;
2579     min = 0;
2580     max = GET2(ecode, 1);
2581     ecode += 3;
2582     goto REPEATTYPE;
2583
2584     case OP_TYPESTAR:
2585     case OP_TYPEMINSTAR:
2586     case OP_TYPEPLUS:
2587     case OP_TYPEMINPLUS:
2588     case OP_TYPEQUERY:
2589     case OP_TYPEMINQUERY:
2590     c = *ecode++ - OP_TYPESTAR;
2591     minimize = (c & 1) != 0;
2592     min = rep_min[c];                 /* Pick up values from tables; */
2593     max = rep_max[c];                 /* zero for max => infinity */
2594     if (max == 0) max = INT_MAX;
2595
2596     /* Common code for all repeated single character type matches. Note that
2597     in UTF-8 mode, '.' matches a character of any length, but for the other
2598     character types, the valid characters are all one-byte long. */
2599
2600     REPEATTYPE:
2601     ctype = *ecode++;      /* Code for the character type */
2602
2603 #ifdef SUPPORT_UCP
2604     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2605       {
2606       prop_fail_result = ctype == OP_NOTPROP;
2607       prop_type = *ecode++;
2608       prop_value = *ecode++;
2609       }
2610     else prop_type = -1;
2611 #endif
2612
2613     /* First, ensure the minimum number of matches are present. Use inline
2614     code for maximizing the speed, and do the type test once at the start
2615     (i.e. keep it out of the loop). Also we can test that there are at least
2616     the minimum number of bytes before we start. This isn't as effective in
2617     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2618     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2619     and single-bytes. */
2620
2621     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2622     if (min > 0)
2623       {
2624 #ifdef SUPPORT_UCP
2625       if (prop_type >= 0)
2626         {
2627         switch(prop_type)
2628           {
2629           case PT_ANY:
2630           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2631           for (i = 1; i <= min; i++)
2632             {
2633             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2634             GETCHARINC(c, eptr);
2635             }
2636           break;
2637
2638           case PT_LAMP:
2639           for (i = 1; i <= min; i++)
2640             {
2641             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2642             GETCHARINC(c, eptr);
2643             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2644             if ((prop_chartype == ucp_Lu ||
2645                  prop_chartype == ucp_Ll ||
2646                  prop_chartype == ucp_Lt) == prop_fail_result)
2647               RRETURN(MATCH_NOMATCH);
2648             }
2649           break;
2650
2651           case PT_GC:
2652           for (i = 1; i <= min; i++)
2653             {
2654             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2655             GETCHARINC(c, eptr);
2656             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2657             if ((prop_category == prop_value) == prop_fail_result)
2658               RRETURN(MATCH_NOMATCH);
2659             }
2660           break;
2661
2662           case PT_PC:
2663           for (i = 1; i <= min; i++)
2664             {
2665             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2666             GETCHARINC(c, eptr);
2667             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2668             if ((prop_chartype == prop_value) == prop_fail_result)
2669               RRETURN(MATCH_NOMATCH);
2670             }
2671           break;
2672
2673           case PT_SC:
2674           for (i = 1; i <= min; i++)
2675             {
2676             if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2677             GETCHARINC(c, eptr);
2678             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2679             if ((prop_script == prop_value) == prop_fail_result)
2680               RRETURN(MATCH_NOMATCH);
2681             }
2682           break;
2683
2684           default:
2685           RRETURN(PCRE_ERROR_INTERNAL);
2686           }
2687         }
2688
2689       /* Match extended Unicode sequences. We will get here only if the
2690       support is in the binary; otherwise a compile-time error occurs. */
2691
2692       else if (ctype == OP_EXTUNI)
2693         {
2694         for (i = 1; i <= min; i++)
2695           {
2696           GETCHARINCTEST(c, eptr);
2697           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2698           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2699           while (eptr < md->end_subject)
2700             {
2701             int len = 1;
2702             if (!utf8) c = *eptr; else
2703               {
2704               GETCHARLEN(c, eptr, len);
2705               }
2706             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2707             if (prop_category != ucp_M) break;
2708             eptr += len;
2709             }
2710           }
2711         }
2712
2713       else
2714 #endif     /* SUPPORT_UCP */
2715
2716 /* Handle all other cases when the coding is UTF-8 */
2717
2718 #ifdef SUPPORT_UTF8
2719       if (utf8) switch(ctype)
2720         {
2721         case OP_ANY:
2722         for (i = 1; i <= min; i++)
2723           {
2724           if (eptr >= md->end_subject ||
2725                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2726             RRETURN(MATCH_NOMATCH);
2727           eptr++;
2728           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2729           }
2730         break;
2731
2732         case OP_ANYBYTE:
2733         eptr += min;
2734         break;
2735
2736         case OP_ANYNL:
2737         for (i = 1; i <= min; i++)
2738           {
2739           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2740           GETCHARINC(c, eptr);
2741           switch(c)
2742             {
2743             default: RRETURN(MATCH_NOMATCH);
2744             case 0x000d:
2745             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2746             break;
2747             case 0x000a:
2748             case 0x000b:
2749             case 0x000c:
2750             case 0x0085:
2751             case 0x2028:
2752             case 0x2029:
2753             break;
2754             }
2755           }
2756         break;
2757
2758         case OP_NOT_DIGIT:
2759         for (i = 1; i <= min; i++)
2760           {
2761           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2762           GETCHARINC(c, eptr);
2763           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2764             RRETURN(MATCH_NOMATCH);
2765           }
2766         break;
2767
2768         case OP_DIGIT:
2769         for (i = 1; i <= min; i++)
2770           {
2771           if (eptr >= md->end_subject ||
2772              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2773             RRETURN(MATCH_NOMATCH);
2774           /* No need to skip more bytes - we know it's a 1-byte character */
2775           }
2776         break;
2777
2778         case OP_NOT_WHITESPACE:
2779         for (i = 1; i <= min; i++)
2780           {
2781           if (eptr >= md->end_subject ||
2782              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2783             RRETURN(MATCH_NOMATCH);
2784           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2785           }
2786         break;
2787
2788         case OP_WHITESPACE:
2789         for (i = 1; i <= min; i++)
2790           {
2791           if (eptr >= md->end_subject ||
2792              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2793             RRETURN(MATCH_NOMATCH);
2794           /* No need to skip more bytes - we know it's a 1-byte character */
2795           }
2796         break;
2797
2798         case OP_NOT_WORDCHAR:
2799         for (i = 1; i <= min; i++)
2800           {
2801           if (eptr >= md->end_subject ||
2802              (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2803             RRETURN(MATCH_NOMATCH);
2804           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2805           }
2806         break;
2807
2808         case OP_WORDCHAR:
2809         for (i = 1; i <= min; i++)
2810           {
2811           if (eptr >= md->end_subject ||
2812              *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2813             RRETURN(MATCH_NOMATCH);
2814           /* No need to skip more bytes - we know it's a 1-byte character */
2815           }
2816         break;
2817
2818         default:
2819         RRETURN(PCRE_ERROR_INTERNAL);
2820         }  /* End switch(ctype) */
2821
2822       else
2823 #endif     /* SUPPORT_UTF8 */
2824
2825       /* Code for the non-UTF-8 case for minimum matching of operators other
2826       than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2827       number of bytes present, as this was tested above. */
2828
2829       switch(ctype)
2830         {
2831         case OP_ANY:
2832         if ((ims & PCRE_DOTALL) == 0)
2833           {
2834           for (i = 1; i <= min; i++)
2835             {
2836             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2837             eptr++;
2838             }
2839           }
2840         else eptr += min;
2841         break;
2842
2843         case OP_ANYBYTE:
2844         eptr += min;
2845         break;
2846
2847         /* Because of the CRLF case, we can't assume the minimum number of
2848         bytes are present in this case. */
2849
2850         case OP_ANYNL:
2851         for (i = 1; i <= min; i++)
2852           {
2853           if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2854           switch(*eptr++)
2855             {
2856             default: RRETURN(MATCH_NOMATCH);
2857             case 0x000d:
2858             if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2859             break;
2860             case 0x000a:
2861             case 0x000b:
2862             case 0x000c:
2863             case 0x0085:
2864             break;
2865             }
2866           }
2867         break;
2868
2869         case OP_NOT_DIGIT:
2870         for (i = 1; i <= min; i++)
2871           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2872         break;
2873
2874         case OP_DIGIT:
2875         for (i = 1; i <= min; i++)
2876           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2877         break;
2878
2879         case OP_NOT_WHITESPACE:
2880         for (i = 1; i <= min; i++)
2881           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2882         break;
2883
2884         case OP_WHITESPACE:
2885         for (i = 1; i <= min; i++)
2886           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2887         break;
2888
2889         case OP_NOT_WORDCHAR:
2890         for (i = 1; i <= min; i++)
2891           if ((md->ctypes[*eptr++] & ctype_word) != 0)
2892             RRETURN(MATCH_NOMATCH);
2893         break;
2894
2895         case OP_WORDCHAR:
2896         for (i = 1; i <= min; i++)
2897           if ((md->ctypes[*eptr++] & ctype_word) == 0)
2898             RRETURN(MATCH_NOMATCH);
2899         break;
2900
2901         default:
2902         RRETURN(PCRE_ERROR_INTERNAL);
2903         }
2904       }
2905
2906     /* If min = max, continue at the same level without recursing */
2907
2908     if (min == max) continue;
2909
2910     /* If minimizing, we have to test the rest of the pattern before each
2911     subsequent match. Again, separate the UTF-8 case for speed, and also
2912     separate the UCP cases. */
2913
2914     if (minimize)
2915       {
2916 #ifdef SUPPORT_UCP
2917       if (prop_type >= 0)
2918         {
2919         switch(prop_type)
2920           {
2921           case PT_ANY:
2922           for (fi = min;; fi++)
2923             {
2924             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2925             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2926             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2927             GETCHARINC(c, eptr);
2928             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2929             }
2930           /* Control never gets here */
2931
2932           case PT_LAMP:
2933           for (fi = min;; fi++)
2934             {
2935             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2936             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2937             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938             GETCHARINC(c, eptr);
2939             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2940             if ((prop_chartype == ucp_Lu ||
2941                  prop_chartype == ucp_Ll ||
2942                  prop_chartype == ucp_Lt) == prop_fail_result)
2943               RRETURN(MATCH_NOMATCH);
2944             }
2945           /* Control never gets here */
2946
2947           case PT_GC:
2948           for (fi = min;; fi++)
2949             {
2950             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2951             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2952             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2953             GETCHARINC(c, eptr);
2954             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2955             if ((prop_category == prop_value) == prop_fail_result)
2956               RRETURN(MATCH_NOMATCH);
2957             }
2958           /* Control never gets here */
2959
2960           case PT_PC:
2961           for (fi = min;; fi++)
2962             {
2963             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2964             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2965             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2966             GETCHARINC(c, eptr);
2967             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2968             if ((prop_chartype == prop_value) == prop_fail_result)
2969               RRETURN(MATCH_NOMATCH);
2970             }
2971           /* Control never gets here */
2972
2973           case PT_SC:
2974           for (fi = min;; fi++)
2975             {
2976             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2977             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2978             if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2979             GETCHARINC(c, eptr);
2980             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2981             if ((prop_script == prop_value) == prop_fail_result)
2982               RRETURN(MATCH_NOMATCH);
2983             }
2984           /* Control never gets here */
2985
2986           default:
2987           RRETURN(PCRE_ERROR_INTERNAL);
2988           }
2989         }
2990
2991       /* Match extended Unicode sequences. We will get here only if the
2992       support is in the binary; otherwise a compile-time error occurs. */
2993
2994       else if (ctype == OP_EXTUNI)
2995         {
2996         for (fi = min;; fi++)
2997           {
2998           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2999           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3000           if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001           GETCHARINCTEST(c, eptr);
3002           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3003           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3004           while (eptr < md->end_subject)
3005             {
3006             int len = 1;
3007             if (!utf8) c = *eptr; else
3008               {
3009               GETCHARLEN(c, eptr, len);
3010               }
3011             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3012             if (prop_category != ucp_M) break;
3013             eptr += len;
3014             }
3015           }
3016         }
3017
3018       else
3019 #endif     /* SUPPORT_UCP */
3020
3021 #ifdef SUPPORT_UTF8
3022       /* UTF-8 mode */
3023       if (utf8)
3024         {
3025         for (fi = min;; fi++)
3026           {
3027           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3028           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3029           if (fi >= max || eptr >= md->end_subject ||
3030                (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3031                 IS_NEWLINE(eptr)))
3032             RRETURN(MATCH_NOMATCH);
3033
3034           GETCHARINC(c, eptr);
3035           switch(ctype)
3036             {
3037             case OP_ANY:        /* This is the DOTALL case */
3038             break;
3039
3040             case OP_ANYBYTE:
3041             break;
3042
3043             case OP_ANYNL:
3044             switch(c)
3045               {
3046               default: RRETURN(MATCH_NOMATCH);
3047               case 0x000d:
3048               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3049               break;
3050               case 0x000a:
3051               case 0x000b:
3052               case 0x000c:
3053               case 0x0085:
3054               case 0x2028:
3055               case 0x2029:
3056               break;
3057               }
3058             break;
3059
3060             case OP_NOT_DIGIT:
3061             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3062               RRETURN(MATCH_NOMATCH);
3063             break;
3064
3065             case OP_DIGIT:
3066             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3067               RRETURN(MATCH_NOMATCH);
3068             break;
3069
3070             case OP_NOT_WHITESPACE:
3071             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3072               RRETURN(MATCH_NOMATCH);
3073             break;
3074
3075             case OP_WHITESPACE:
3076             if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3077               RRETURN(MATCH_NOMATCH);
3078             break;
3079
3080             case OP_NOT_WORDCHAR:
3081             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3082               RRETURN(MATCH_NOMATCH);
3083             break;
3084
3085             case OP_WORDCHAR:
3086             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3087               RRETURN(MATCH_NOMATCH);
3088             break;
3089
3090             default:
3091             RRETURN(PCRE_ERROR_INTERNAL);
3092             }
3093           }
3094         }
3095       else
3096 #endif
3097       /* Not UTF-8 mode */
3098         {
3099         for (fi = min;; fi++)
3100           {
3101           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3102           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3103           if (fi >= max || eptr >= md->end_subject ||
3104                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3105             RRETURN(MATCH_NOMATCH);
3106
3107           c = *eptr++;
3108           switch(ctype)
3109             {
3110             case OP_ANY:   /* This is the DOTALL case */
3111             break;
3112
3113             case OP_ANYBYTE:
3114             break;
3115
3116             case OP_ANYNL:
3117             switch(c)
3118               {
3119               default: RRETURN(MATCH_NOMATCH);
3120               case 0x000d:
3121               if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3122               break;
3123               case 0x000a:
3124               case 0x000b:
3125               case 0x000c:
3126               case 0x0085:
3127               break;
3128               }
3129             break;
3130
3131             case OP_NOT_DIGIT:
3132             if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3133             break;
3134
3135             case OP_DIGIT:
3136             if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3137             break;
3138
3139             case OP_NOT_WHITESPACE:
3140             if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3141             break;
3142
3143             case OP_WHITESPACE:
3144             if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3145             break;
3146
3147             case OP_NOT_WORDCHAR:
3148             if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3149             break;
3150
3151             case OP_WORDCHAR:
3152             if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3153             break;
3154
3155             default:
3156             RRETURN(PCRE_ERROR_INTERNAL);
3157             }
3158           }
3159         }
3160       /* Control never gets here */
3161       }
3162
3163     /* If maximizing, it is worth using inline code for speed, doing the type
3164     test once at the start (i.e. keep it out of the loop). Again, keep the
3165     UTF-8 and UCP stuff separate. */
3166
3167     else
3168       {
3169       pp = eptr;  /* Remember where we started */
3170
3171 #ifdef SUPPORT_UCP
3172       if (prop_type >= 0)
3173         {
3174         switch(prop_type)
3175           {
3176           case PT_ANY:
3177           for (i = min; i < max; i++)
3178             {
3179             int len = 1;
3180             if (eptr >= md->end_subject) break;
3181             GETCHARLEN(c, eptr, len);
3182             if (prop_fail_result) break;
3183             eptr+= len;
3184             }
3185           break;
3186
3187           case PT_LAMP:
3188           for (i = min; i < max; i++)
3189             {
3190             int len = 1;
3191             if (eptr >= md->end_subject) break;
3192             GETCHARLEN(c, eptr, len);
3193             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3194             if ((prop_chartype == ucp_Lu ||
3195                  prop_chartype == ucp_Ll ||
3196                  prop_chartype == ucp_Lt) == prop_fail_result)
3197               break;
3198             eptr+= len;
3199             }
3200           break;
3201
3202           case PT_GC:
3203           for (i = min; i < max; i++)
3204             {
3205             int len = 1;
3206             if (eptr >= md->end_subject) break;
3207             GETCHARLEN(c, eptr, len);
3208             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3209             if ((prop_category == prop_value) == prop_fail_result)
3210               break;
3211             eptr+= len;
3212             }
3213           break;
3214
3215           case PT_PC:
3216           for (i = min; i < max; i++)
3217             {
3218             int len = 1;
3219             if (eptr >= md->end_subject) break;
3220             GETCHARLEN(c, eptr, len);
3221             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3222             if ((prop_chartype == prop_value) == prop_fail_result)
3223               break;
3224             eptr+= len;
3225             }
3226           break;
3227
3228           case PT_SC:
3229           for (i = min; i < max; i++)
3230             {
3231             int len = 1;
3232             if (eptr >= md->end_subject) break;
3233             GETCHARLEN(c, eptr, len);
3234             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3235             if ((prop_script == prop_value) == prop_fail_result)
3236               break;
3237             eptr+= len;
3238             }
3239           break;
3240           }
3241
3242         /* eptr is now past the end of the maximum run */
3243
3244         if (possessive) continue;
3245         for(;;)
3246           {
3247           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3248           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3250           BACKCHAR(eptr);
3251           }
3252         }
3253
3254       /* Match extended Unicode sequences. We will get here only if the
3255       support is in the binary; otherwise a compile-time error occurs. */
3256
3257       else if (ctype == OP_EXTUNI)
3258         {
3259         for (i = min; i < max; i++)
3260           {
3261           if (eptr >= md->end_subject) break;
3262           GETCHARINCTEST(c, eptr);
3263           prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3264           if (prop_category == ucp_M) break;
3265           while (eptr < md->end_subject)
3266             {
3267             int len = 1;
3268             if (!utf8) c = *eptr; else
3269               {
3270               GETCHARLEN(c, eptr, len);
3271               }
3272             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3273             if (prop_category != ucp_M) break;
3274             eptr += len;
3275             }
3276           }
3277
3278         /* eptr is now past the end of the maximum run */
3279
3280         if (possessive) continue;
3281         for(;;)
3282           {
3283           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3284           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3285           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3286           for (;;)                        /* Move back over one extended */
3287             {
3288             int len = 1;
3289             BACKCHAR(eptr);
3290             if (!utf8) c = *eptr; else
3291               {
3292               GETCHARLEN(c, eptr, len);
3293               }
3294             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3295             if (prop_category != ucp_M) break;
3296             eptr--;
3297             }
3298           }
3299         }
3300
3301       else
3302 #endif   /* SUPPORT_UCP */
3303
3304 #ifdef SUPPORT_UTF8
3305       /* UTF-8 mode */
3306
3307       if (utf8)
3308         {
3309         switch(ctype)
3310           {
3311           case OP_ANY:
3312
3313           /* Special code is required for UTF8, but when the maximum is
3314           unlimited we don't need it, so we repeat the non-UTF8 code. This is
3315           probably worth it, because .* is quite a common idiom. */
3316
3317           if (max < INT_MAX)
3318             {
3319             if ((ims & PCRE_DOTALL) == 0)
3320               {
3321               for (i = min; i < max; i++)
3322                 {
3323                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3324                 eptr++;
3325                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3326                 }
3327               }
3328             else
3329               {
3330               for (i = min; i < max; i++)
3331                 {
3332                 if (eptr >= md->end_subject) break;
3333                 eptr++;
3334                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3335                 }
3336               }
3337             }
3338
3339           /* Handle unlimited UTF-8 repeat */
3340
3341           else
3342             {
3343             if ((ims & PCRE_DOTALL) == 0)
3344               {
3345               for (i = min; i < max; i++)
3346                 {
3347                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3348                 eptr++;
3349                 }
3350               break;
3351               }
3352             else
3353               {
3354               c = max - min;
3355               if (c > (unsigned int)(md->end_subject - eptr))
3356                 c = md->end_subject - eptr;
3357               eptr += c;
3358               }
3359             }
3360           break;
3361
3362           /* The byte case is the same as non-UTF8 */
3363
3364           case OP_ANYBYTE:
3365           c = max - min;
3366           if (c > (unsigned int)(md->end_subject - eptr))
3367             c = md->end_subject - eptr;
3368           eptr += c;
3369           break;
3370
3371           case OP_ANYNL:
3372           for (i = min; i < max; i++)
3373             {
3374             int len = 1;
3375             if (eptr >= md->end_subject) break;
3376             GETCHARLEN(c, eptr, len);
3377             if (c == 0x000d)
3378               {
3379               if (++eptr >= md->end_subject) break;
3380               if (*eptr == 0x000a) eptr++;
3381               }
3382             else
3383               {
3384               if (c != 0x000a && c != 0x000b && c != 0x000c &&
3385                   c != 0x0085 && c != 0x2028 && c != 0x2029)
3386                 break;
3387               eptr += len;
3388               }
3389             }
3390           break;
3391
3392           case OP_NOT_DIGIT:
3393           for (i = min; i < max; i++)
3394             {
3395             int len = 1;
3396             if (eptr >= md->end_subject) break;
3397             GETCHARLEN(c, eptr, len);
3398             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3399             eptr+= len;
3400             }
3401           break;
3402
3403           case OP_DIGIT:
3404           for (i = min; i < max; i++)
3405             {
3406             int len = 1;
3407             if (eptr >= md->end_subject) break;
3408             GETCHARLEN(c, eptr, len);
3409             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3410             eptr+= len;
3411             }
3412           break;
3413
3414           case OP_NOT_WHITESPACE:
3415           for (i = min; i < max; i++)
3416             {
3417             int len = 1;
3418             if (eptr >= md->end_subject) break;
3419             GETCHARLEN(c, eptr, len);
3420             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3421             eptr+= len;
3422             }
3423           break;
3424
3425           case OP_WHITESPACE:
3426           for (i = min; i < max; i++)
3427             {
3428             int len = 1;
3429             if (eptr >= md->end_subject) break;
3430             GETCHARLEN(c, eptr, len);
3431             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3432             eptr+= len;
3433             }
3434           break;
3435
3436           case OP_NOT_WORDCHAR:
3437           for (i = min; i < max; i++)
3438             {
3439             int len = 1;
3440             if (eptr >= md->end_subject) break;
3441             GETCHARLEN(c, eptr, len);
3442             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3443             eptr+= len;
3444             }
3445           break;
3446
3447           case OP_WORDCHAR:
3448           for (i = min; i < max; i++)
3449             {
3450             int len = 1;
3451             if (eptr >= md->end_subject) break;
3452             GETCHARLEN(c, eptr, len);
3453             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3454             eptr+= len;
3455             }
3456           break;
3457
3458           default:
3459           RRETURN(PCRE_ERROR_INTERNAL);
3460           }
3461
3462         /* eptr is now past the end of the maximum run */
3463
3464         if (possessive) continue;
3465         for(;;)
3466           {
3467           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3468           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469           if (eptr-- == pp) break;        /* Stop if tried at original pos */
3470           BACKCHAR(eptr);
3471           }
3472         }
3473       else
3474 #endif
3475
3476       /* Not UTF-8 mode */
3477         {
3478         switch(ctype)
3479           {
3480           case OP_ANY:
3481           if ((ims & PCRE_DOTALL) == 0)
3482             {
3483             for (i = min; i < max; i++)
3484               {
3485               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3486               eptr++;
3487               }
3488             break;
3489             }
3490           /* For DOTALL case, fall through and treat as \C */
3491
3492           case OP_ANYBYTE:
3493           c = max - min;
3494           if (c > (unsigned int)(md->end_subject - eptr))
3495             c = md->end_subject - eptr;
3496           eptr += c;
3497           break;
3498
3499           case OP_ANYNL:
3500           for (i = min; i < max; i++)
3501             {
3502             if (eptr >= md->end_subject) break;
3503             c = *eptr;
3504             if (c == 0x000d)
3505               {
3506               if (++eptr >= md->end_subject) break;
3507               if (*eptr == 0x000a) eptr++;
3508               }
3509             else
3510               {
3511               if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3512                 break;
3513               eptr++;
3514               }
3515             }
3516           break;
3517
3518           case OP_NOT_DIGIT:
3519           for (i = min; i < max; i++)
3520             {
3521             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3522               break;
3523             eptr++;
3524             }
3525           break;
3526
3527           case OP_DIGIT:
3528           for (i = min; i < max; i++)
3529             {
3530             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3531               break;
3532             eptr++;
3533             }
3534           break;
3535
3536           case OP_NOT_WHITESPACE:
3537           for (i = min; i < max; i++)
3538             {
3539             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3540               break;
3541             eptr++;
3542             }
3543           break;
3544
3545           case OP_WHITESPACE:
3546           for (i = min; i < max; i++)
3547             {
3548             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3549               break;
3550             eptr++;
3551             }
3552           break;
3553
3554           case OP_NOT_WORDCHAR:
3555           for (i = min; i < max; i++)
3556             {
3557             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3558               break;
3559             eptr++;
3560             }
3561           break;
3562
3563           case OP_WORDCHAR:
3564           for (i = min; i < max; i++)
3565             {
3566             if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3567               break;
3568             eptr++;
3569             }
3570           break;
3571
3572           default:
3573           RRETURN(PCRE_ERROR_INTERNAL);
3574           }
3575
3576         /* eptr is now past the end of the maximum run */
3577
3578         if (possessive) continue;
3579         while (eptr >= pp)
3580           {
3581           RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3582           eptr--;
3583           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3584           }
3585         }
3586
3587       /* Get here if we can't make it match with any permitted repetitions */
3588
3589       RRETURN(MATCH_NOMATCH);
3590       }
3591     /* Control never gets here */
3592
3593     /* There's been some horrible disaster. Arrival here can only mean there is
3594     something seriously wrong in the code above or the OP_xxx definitions. */
3595
3596     default:
3597     DPRINTF(("Unknown opcode %d\n", *ecode));
3598     RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3599     }
3600
3601   /* Do not stick any code in here without much thought; it is assumed
3602   that "continue" in the code above comes out to here to repeat the main
3603   loop. */
3604
3605   }             /* End of main loop */
3606 /* Control never reaches here */
3607 }
3608
3609
3610 /***************************************************************************
3611 ****************************************************************************
3612                    RECURSION IN THE match() FUNCTION
3613
3614 Undefine all the macros that were defined above to handle this. */
3615
3616 #ifdef NO_RECURSE
3617 #undef eptr
3618 #undef ecode
3619 #undef offset_top
3620 #undef ims
3621 #undef eptrb
3622 #undef flags
3623
3624 #undef callpat
3625 #undef charptr
3626 #undef data
3627 #undef next
3628 #undef pp
3629 #undef prev
3630 #undef saved_eptr
3631
3632 #undef new_recursive
3633
3634 #undef cur_is_word
3635 #undef condition
3636 #undef prev_is_word
3637
3638 #undef original_ims
3639
3640 #undef ctype
3641 #undef length
3642 #undef max
3643 #undef min
3644 #undef number
3645 #undef offset
3646 #undef op
3647 #undef save_capture_last
3648 #undef save_offset1
3649 #undef save_offset2
3650 #undef save_offset3
3651 #undef stacksave
3652
3653 #undef newptrb
3654
3655 #endif
3656
3657 /* These two are defined as macros in both cases */
3658
3659 #undef fc
3660 #undef fi
3661
3662 /***************************************************************************
3663 ***************************************************************************/
3664
3665
3666
3667 /*************************************************
3668 *         Execute a Regular Expression           *
3669 *************************************************/
3670
3671 /* This function applies a compiled re to a subject string and picks out
3672 portions of the string if it matches. Two elements in the vector are set for
3673 each substring: the offsets to the start and end of the substring.
3674
3675 Arguments:
3676   argument_re     points to the compiled expression
3677   extra_data      points to extra data or is NULL
3678   subject         points to the subject string
3679   length          length of subject string (may contain binary zeros)
3680   start_offset    where to start in the subject string
3681   options         option bits
3682   offsets         points to a vector of ints to be filled in with offsets
3683   offsetcount     the number of elements in the vector
3684
3685 Returns:          > 0 => success; value is the number of elements filled in
3686                   = 0 => success, but offsets is not big enough
3687                    -1 => failed to match
3688                  < -1 => some kind of unexpected problem
3689 */
3690
3691 PCRE_DATA_SCOPE int
3692 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3693   PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3694   int offsetcount)
3695 {
3696 int rc, resetcount, ocount;
3697 int first_byte = -1;
3698 int req_byte = -1;
3699 int req_byte2 = -1;
3700 int newline;
3701 unsigned long int ims;
3702 BOOL using_temporary_offsets = FALSE;
3703 BOOL anchored;
3704 BOOL startline;
3705 BOOL firstline;
3706 BOOL first_byte_caseless = FALSE;
3707 BOOL req_byte_caseless = FALSE;
3708 BOOL utf8;
3709 match_data match_block;
3710 match_data *md = &match_block;
3711 const uschar *tables;
3712 const uschar *start_bits = NULL;
3713 USPTR start_match = (USPTR)subject + start_offset;
3714 USPTR end_subject;
3715 USPTR req_byte_ptr = start_match - 1;
3716 eptrblock eptrchain[EPTR_WORK_SIZE];
3717
3718 pcre_study_data internal_study;
3719 const pcre_study_data *study;
3720
3721 real_pcre internal_re;
3722 const real_pcre *external_re = (const real_pcre *)argument_re;
3723 const real_pcre *re = external_re;
3724
3725 /* Plausibility checks */
3726
3727 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3728 if (re == NULL || subject == NULL ||
3729    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3730 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3731
3732 /* Fish out the optional data from the extra_data structure, first setting
3733 the default values. */
3734
3735 study = NULL;
3736 md->match_limit = MATCH_LIMIT;
3737 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3738 md->callout_data = NULL;
3739
3740 /* The table pointer is always in native byte order. */
3741
3742 tables = external_re->tables;
3743
3744 if (extra_data != NULL)
3745   {
3746   register unsigned int flags = extra_data->flags;
3747   if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3748     study = (const pcre_study_data *)extra_data->study_data;
3749   if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3750     md->match_limit = extra_data->match_limit;
3751   if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3752     md->match_limit_recursion = extra_data->match_limit_recursion;
3753   if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3754     md->callout_data = extra_data->callout_data;
3755   if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3756   }
3757
3758 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3759 is a feature that makes it possible to save compiled regex and re-use them
3760 in other programs later. */
3761
3762 if (tables == NULL) tables = _pcre_default_tables;
3763
3764 /* Check that the first field in the block is the magic number. If it is not,
3765 test for a regex that was compiled on a host of opposite endianness. If this is
3766 the case, flipped values are put in internal_re and internal_study if there was
3767 study data too. */
3768
3769 if (re->magic_number != MAGIC_NUMBER)
3770   {
3771   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3772   if (re == NULL) return PCRE_ERROR_BADMAGIC;
3773   if (study != NULL) study = &internal_study;
3774   }
3775
3776 /* Set up other data */
3777
3778 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3779 startline = (re->options & PCRE_STARTLINE) != 0;
3780 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3781
3782 /* The code starts after the real_pcre block and the capture name table. */
3783
3784 md->start_code = (const uschar *)external_re + re->name_table_offset +
3785   re->name_count * re->name_entry_size;
3786
3787 md->start_subject = (USPTR)subject;
3788 md->start_offset = start_offset;
3789 md->end_subject = md->start_subject + length;
3790 end_subject = md->end_subject;
3791
3792 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3793 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3794
3795 md->notbol = (options & PCRE_NOTBOL) != 0;
3796 md->noteol = (options & PCRE_NOTEOL) != 0;
3797 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3798 md->partial = (options & PCRE_PARTIAL) != 0;
3799 md->hitend = FALSE;
3800
3801 md->recursive = NULL;                   /* No recursion at top level */
3802 md->eptrchain = eptrchain;              /* Make workspace generally available */
3803
3804 md->lcc = tables + lcc_offset;
3805 md->ctypes = tables + ctypes_offset;
3806
3807 /* Handle different types of newline. The two bits give four cases. If nothing
3808 is set at run time, whatever was used at compile time applies. */
3809
3810 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3811        PCRE_NEWLINE_BITS)
3812   {
3813   case 0: newline = NEWLINE; break;   /* Compile-time default */
3814   case PCRE_NEWLINE_CR: newline = '\r'; break;
3815   case PCRE_NEWLINE_LF: newline = '\n'; break;
3816   case PCRE_NEWLINE_CR+
3817        PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3818   case PCRE_NEWLINE_ANY: newline = -1; break;
3819   default: return PCRE_ERROR_BADNEWLINE;
3820   }
3821
3822 if (newline < 0)
3823   {
3824   md->nltype = NLTYPE_ANY;
3825   }
3826 else
3827   {
3828   md->nltype = NLTYPE_FIXED;
3829   if (newline > 255)
3830     {
3831     md->nllen = 2;
3832     md->nl[0] = (newline >> 8) & 255;
3833     md->nl[1] = newline & 255;
3834     }
3835   else
3836     {
3837     md->nllen = 1;
3838     md->nl[0] = newline;
3839     }
3840   }
3841
3842 /* Partial matching is supported only for a restricted set of regexes at the
3843 moment. */
3844
3845 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3846   return PCRE_ERROR_BADPARTIAL;
3847
3848 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3849 back the character offset. */
3850
3851 #ifdef SUPPORT_UTF8
3852 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3853   {
3854   if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3855     return PCRE_ERROR_BADUTF8;
3856   if (start_offset > 0 && start_offset < length)
3857     {
3858     int tb = ((uschar *)subject)[start_offset];
3859     if (tb > 127)
3860       {
3861       tb &= 0xc0;
3862       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3863       }
3864     }
3865   }
3866 #endif
3867
3868 /* The ims options can vary during the matching as a result of the presence
3869 of (?ims) items in the pattern. They are kept in a local variable so that
3870 restoring at the exit of a group is easy. */
3871
3872 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3873
3874 /* If the expression has got more back references than the offsets supplied can
3875 hold, we get a temporary chunk of working store to use during the matching.
3876 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3877 of 3. */
3878
3879 ocount = offsetcount - (offsetcount % 3);
3880
3881 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3882   {
3883   ocount = re->top_backref * 3 + 3;
3884   md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3885   if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3886   using_temporary_offsets = TRUE;
3887   DPRINTF(("Got memory to hold back references\n"));
3888   }
3889 else md->offset_vector = offsets;
3890
3891 md->offset_end = ocount;
3892 md->offset_max = (2*ocount)/3;
3893 md->offset_overflow = FALSE;
3894 md->capture_last = -1;
3895
3896 /* Compute the minimum number of offsets that we need to reset each time. Doing
3897 this makes a huge difference to execution time when there aren't many brackets
3898 in the pattern. */
3899
3900 resetcount = 2 + re->top_bracket * 2;
3901 if (resetcount > offsetcount) resetcount = ocount;
3902
3903 /* Reset the working variable associated with each extraction. These should
3904 never be used unless previously set, but they get saved and restored, and so we
3905 initialize them to avoid reading uninitialized locations. */
3906
3907 if (md->offset_vector != NULL)
3908   {
3909   register int *iptr = md->offset_vector + ocount;
3910   register int *iend = iptr - resetcount/2 + 1;
3911   while (--iptr >= iend) *iptr = -1;
3912   }
3913
3914 /* Set up the first character to match, if available. The first_byte value is
3915 never set for an anchored regular expression, but the anchoring may be forced
3916 at run time, so we have to test for anchoring. The first char may be unset for
3917 an unanchored pattern, of course. If there's no first char and the pattern was
3918 studied, there may be a bitmap of possible first characters. */
3919
3920 if (!anchored)
3921   {
3922   if ((re->options & PCRE_FIRSTSET) != 0)
3923     {
3924     first_byte = re->first_byte & 255;
3925     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3926       first_byte = md->lcc[first_byte];
3927     }
3928   else
3929     if (!startline && study != NULL &&
3930       (study->options & PCRE_STUDY_MAPPED) != 0)
3931         start_bits = study->start_bits;
3932   }
3933
3934 /* For anchored or unanchored matches, there may be a "last known required
3935 character" set. */
3936
3937 if ((re->options & PCRE_REQCHSET) != 0)
3938   {
3939   req_byte = re->req_byte & 255;
3940   req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3941   req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3942   }
3943
3944
3945 /* ==========================================================================*/
3946
3947 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3948 the loop runs just once. */
3949
3950 for(;;)
3951   {
3952   USPTR save_end_subject = end_subject;
3953
3954   /* Reset the maximum number of extractions we might see. */
3955
3956   if (md->offset_vector != NULL)
3957     {
3958     register int *iptr = md->offset_vector;
3959     register int *iend = iptr + resetcount;
3960     while (iptr < iend) *iptr++ = -1;
3961     }
3962
3963   /* Advance to a unique first char if possible. If firstline is TRUE, the
3964   start of the match is constrained to the first line of a multiline string.
3965   That is, the match must be before or at the first newline. Implement this by
3966   temporarily adjusting end_subject so that we stop scanning at a newline. If
3967   the match fails at the newline, later code breaks this loop. */
3968
3969   if (firstline)
3970     {
3971     USPTR t = start_match;
3972     while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3973     end_subject = t;
3974     }
3975
3976   /* Now test for a unique first byte */
3977
3978   if (first_byte >= 0)
3979     {
3980     if (first_byte_caseless)
3981       while (start_match < end_subject &&
3982              md->lcc[*start_match] != first_byte)
3983         start_match++;
3984     else
3985       while (start_match < end_subject && *start_match != first_byte)
3986         start_match++;
3987     }
3988
3989   /* Or to just after a linebreak for a multiline match if possible */
3990
3991   else if (startline)
3992     {
3993     if (start_match > md->start_subject + start_offset)
3994       {
3995       while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3996         start_match++;
3997       }
3998     }
3999
4000   /* Or to a non-unique first char after study */
4001
4002   else if (start_bits != NULL)
4003     {
4004     while (start_match < end_subject)
4005       {
4006       register unsigned int c = *start_match;
4007       if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4008       }
4009     }
4010
4011   /* Restore fudged end_subject */
4012
4013   end_subject = save_end_subject;
4014
4015 #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4016   printf(">>>> Match against: ");
4017   pchars(start_match, end_subject - start_match, TRUE, md);
4018   printf("\n");
4019 #endif
4020
4021   /* If req_byte is set, we know that that character must appear in the subject
4022   for the match to succeed. If the first character is set, req_byte must be
4023   later in the subject; otherwise the test starts at the match point. This
4024   optimization can save a huge amount of backtracking in patterns with nested
4025   unlimited repeats that aren't going to match. Writing separate code for
4026   cased/caseless versions makes it go faster, as does using an autoincrement
4027   and backing off on a match.
4028
4029   HOWEVER: when the subject string is very, very long, searching to its end can
4030   take a long time, and give bad performance on quite ordinary patterns. This
4031   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4032   string... so we don't do this when the string is sufficiently long.
4033
4034   ALSO: this processing is disabled when partial matching is requested.
4035   */
4036
4037   if (req_byte >= 0 &&
4038       end_subject - start_match < REQ_BYTE_MAX &&
4039       !md->partial)
4040     {
4041     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4042
4043     /* We don't need to repeat the search if we haven't yet reached the
4044     place we found it at last time. */
4045
4046     if (p > req_byte_ptr)
4047       {
4048       if (req_byte_caseless)
4049         {
4050         while (p < end_subject)
4051           {
4052           register int pp = *p++;
4053           if (pp == req_byte || pp == req_byte2) { p--; break; }
4054           }
4055         }
4056       else
4057         {
4058         while (p < end_subject)
4059           {
4060           if (*p++ == req_byte) { p--; break; }
4061           }
4062         }
4063
4064       /* If we can't find the required character, break the matching loop,
4065       forcing a match failure. */
4066
4067       if (p >= end_subject)
4068         {
4069         rc = MATCH_NOMATCH;
4070         break;
4071         }
4072
4073       /* If we have found the required character, save the point where we
4074       found it, so that we don't search again next time round the loop if
4075       the start hasn't passed this character yet. */
4076
4077       req_byte_ptr = p;
4078       }
4079     }
4080
4081   /* OK, we can now run the match. */
4082
4083   md->start_match = start_match;
4084   md->match_call_count = 0;
4085   md->eptrn = 0;                          /* Next free eptrchain slot */
4086   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4087
4088   /* Any return other than MATCH_NOMATCH breaks the loop. */
4089
4090   if (rc != MATCH_NOMATCH) break;
4091
4092   /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4093   newline in the subject (though it may continue over the newline). Therefore,
4094   if we have just failed to match, starting at a newline, do not continue. */
4095
4096   if (firstline && IS_NEWLINE(start_match)) break;
4097
4098   /* Advance the match position by one character. */
4099
4100   start_match++;
4101 #ifdef SUPPORT_UTF8
4102   if (utf8)
4103     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4104       start_match++;
4105 #endif
4106
4107   /* Break the loop if the pattern is anchored or if we have passed the end of
4108   the subject. */
4109
4110   if (anchored || start_match > end_subject) break;
4111
4112   /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4113   are now at a LF, advance the match position by one more character. */
4114
4115   if (start_match[-1] == '\r' &&
4116        (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4117        start_match < end_subject &&
4118        *start_match == '\n')
4119     start_match++;
4120
4121   }   /* End of for(;;) "bumpalong" loop */
4122
4123 /* ==========================================================================*/
4124
4125 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4126 conditions is true:
4127
4128 (1) The pattern is anchored;
4129
4130 (2) We are past the end of the subject;
4131
4132 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4133     this option requests that a match occur at or before the first newline in
4134     the subject.
4135
4136 When we have a match and the offset vector is big enough to deal with any
4137 backreferences, captured substring offsets will already be set up. In the case
4138 where we had to get some local store to hold offsets for backreference
4139 processing, copy those that we can. In this case there need not be overflow if
4140 certain parts of the pattern were not used, even though there are more
4141 capturing parentheses than vector slots. */
4142
4143 if (rc == MATCH_MATCH)
4144   {
4145   if (using_temporary_offsets)
4146     {
4147     if (offsetcount >= 4)
4148       {
4149       memcpy(offsets + 2, md->offset_vector + 2,
4150         (offsetcount - 2) * sizeof(int));
4151       DPRINTF(("Copied offsets from temporary memory\n"));
4152       }
4153     if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4154     DPRINTF(("Freeing temporary memory\n"));
4155     (pcre_free)(md->offset_vector);
4156     }
4157
4158   /* Set the return code to the number of captured strings, or 0 if there are
4159   too many to fit into the vector. */
4160
4161   rc = md->offset_overflow? 0 : md->end_offset_top/2;
4162
4163   /* If there is space, set up the whole thing as substring 0. */
4164
4165   if (offsetcount < 2) rc = 0; else
4166     {
4167     offsets[0] = start_match - md->start_subject;
4168     offsets[1] = md->end_match_ptr - md->start_subject;
4169     }
4170
4171   DPRINTF((">>>> returning %d\n", rc));
4172   return rc;
4173   }
4174
4175 /* Control gets here if there has been an error, or if the overall match
4176 attempt has failed at all permitted starting positions. */
4177
4178 if (using_temporary_offsets)
4179   {
4180   DPRINTF(("Freeing temporary memory\n"));
4181   (pcre_free)(md->offset_vector);
4182   }
4183
4184 if (rc != MATCH_NOMATCH)
4185   {
4186   DPRINTF((">>>> error: returning %d\n", rc));
4187   return rc;
4188   }
4189 else if (md->partial && md->hitend)
4190   {
4191   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4192   return PCRE_ERROR_PARTIAL;
4193   }
4194 else
4195   {
4196   DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4197   return PCRE_ERROR_NOMATCH;
4198   }
4199 }
4200
4201 /* End of pcre_exec.c */