April 2024 | ||||||
Mo | Tu | We | Th | Fr | Sa | Su |
1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 23 | 24 | 25 | 26 | 27 | 28 |
29 | 30 | 1 | 2 | 3 | 4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 |
001: /* Definitions for data structures and routines for the regular 002: expression library. 003: Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008,2011 004: Free Software Foundation, Inc. 005: This file is part of the GNU C Library. 006: 007: The GNU C Library is free software; you can redistribute it and/or 008: modify it under the terms of the GNU Lesser General Public 009: License as published by the Free Software Foundation; either 010: version 2.1 of the License, or (at your option) any later version. 011: 012: The GNU C Library is distributed in the hope that it will be useful, 013: but WITHOUT ANY WARRANTY; without even the implied warranty of 014: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 015: Lesser General Public License for more details. 016: 017: You should have received a copy of the GNU Lesser General Public 018: License along with the GNU C Library; if not, write to the Free 019: Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 020: 02111-1307 USA. */ 021: 022: #ifndef _REGEX_H 023: #define _REGEX_H 1 024: 025: #include <sys/types.h> 026: #include <gnu/option-groups.h> 027: 028: /* Allow the use in C++ code. */ 029: #ifdef __cplusplus 030: extern "C" { 031: #endif 032: 033: /* The following two types have to be signed and unsigned integer type 034: wide enough to hold a value of a pointer. For most ANSI compilers 035: ptrdiff_t and size_t should be likely OK. Still size of these two 036: types is 2 for Microsoft C. Ugh... */ 037: typedef long int s_reg_t; 038: typedef unsigned long int active_reg_t; 039: 040: /* The following bits are used to determine the regexp syntax we 041: recognize. The set/not-set meanings are chosen so that Emacs syntax 042: remains the value 0. The bits are given in alphabetical order, and 043: the definitions shifted by one from the previous bit; thus, when we 044: add or remove a bit, only one other definition need change. */ 045: typedef unsigned long int reg_syntax_t; 046: 047: #ifdef __USE_GNU 048: /* If this bit is not set, then \ inside a bracket expression is literal. 049: If set, then such a \ quotes the following character. */ 050: # define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) 051: 052: /* If this bit is not set, then + and ? are operators, and \+ and \? are 053: literals. 054: If set, then \+ and \? are operators and + and ? are literals. */ 055: # define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) 056: 057: /* If this bit is set, then character classes are supported. They are: 058: [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], 059: [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. 060: If not set, then character classes are not supported. */ 061: # define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) 062: 063: /* If this bit is set, then ^ and $ are always anchors (outside bracket 064: expressions, of course). 065: If this bit is not set, then it depends: 066: ^ is an anchor if it is at the beginning of a regular 067: expression or after an open-group or an alternation operator; 068: $ is an anchor if it is at the end of a regular expression, or 069: before a close-group or an alternation operator. 070: 071: This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because 072: POSIX draft 11.2 says that * etc. in leading positions is undefined. 073: We already implemented a previous draft which made those constructs 074: invalid, though, so we haven't changed the code back. */ 075: # define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) 076: 077: /* If this bit is set, then special characters are always special 078: regardless of where they are in the pattern. 079: If this bit is not set, then special characters are special only in 080: some contexts; otherwise they are ordinary. Specifically, 081: * + ? and intervals are only special when not after the beginning, 082: open-group, or alternation operator. */ 083: # define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) 084: 085: /* If this bit is set, then *, +, ?, and { cannot be first in an re or 086: immediately after an alternation or begin-group operator. */ 087: # define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) 088: 089: /* If this bit is set, then . matches newline. 090: If not set, then it doesn't. */ 091: # define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) 092: 093: /* If this bit is set, then . doesn't match NUL. 094: If not set, then it does. */ 095: # define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) 096: 097: /* If this bit is set, nonmatching lists [^...] do not match newline. 098: If not set, they do. */ 099: # define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) 100: 101: /* If this bit is set, either \{...\} or {...} defines an 102: interval, depending on RE_NO_BK_BRACES. 103: If not set, \{, \}, {, and } are literals. */ 104: # define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) 105: 106: /* If this bit is set, +, ? and | aren't recognized as operators. 107: If not set, they are. */ 108: # define RE_LIMITED_OPS (RE_INTERVALS << 1) 109: 110: /* If this bit is set, newline is an alternation operator. 111: If not set, newline is literal. */ 112: # define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) 113: 114: /* If this bit is set, then `{...}' defines an interval, and \{ and \} 115: are literals. 116: If not set, then `\{...\}' defines an interval. */ 117: # define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) 118: 119: /* If this bit is set, (...) defines a group, and \( and \) are literals. 120: If not set, \(...\) defines a group, and ( and ) are literals. */ 121: # define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) 122: 123: /* If this bit is set, then \<digit> matches <digit>. 124: If not set, then \<digit> is a back-reference. */ 125: # define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) 126: 127: /* If this bit is set, then | is an alternation operator, and \| is literal. 128: If not set, then \| is an alternation operator, and | is literal. */ 129: # define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) 130: 131: /* If this bit is set, then an ending range point collating higher 132: than the starting range point, as in [z-a], is invalid. 133: If not set, then when ending range point collates higher than the 134: starting range point, the range is ignored. */ 135: # define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) 136: 137: /* If this bit is set, then an unmatched ) is ordinary. 138: If not set, then an unmatched ) is invalid. */ 139: # define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) 140: 141: /* If this bit is set, succeed as soon as we match the whole pattern, 142: without further backtracking. */ 143: # define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) 144: 145: /* If this bit is set, do not process the GNU regex operators. 146: If not set, then the GNU regex operators are recognized. */ 147: # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) 148: 149: /* If this bit is set, turn on internal regex debugging. 150: If not set, and debugging was on, turn it off. 151: This only works if regex.c is compiled -DDEBUG. 152: We define this bit always, so that all that's needed to turn on 153: debugging is to recompile regex.c; the calling code can always have 154: this bit set, and it won't affect anything in the normal case. */ 155: # define RE_DEBUG (RE_NO_GNU_OPS << 1) 156: 157: /* If this bit is set, a syntactically invalid interval is treated as 158: a string of ordinary characters. For example, the ERE 'a{1' is 159: treated as 'a\{1'. */ 160: # define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) 161: 162: /* EGLIBC: Old regex implementation does not support these. */ 163: # ifdef __OPTION_POSIX_REGEXP_GLIBC 164: /* If this bit is set, then ignore case when matching. 165: If not set, then case is significant. */ 166: # define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) 167: 168: /* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only 169: for ^, because it is difficult to scan the regex backwards to find 170: whether ^ should be special. */ 171: # define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) 172: 173: /* If this bit is set, then \{ cannot be first in an bre or 174: immediately after an alternation or begin-group operator. */ 175: # define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) 176: 177: /* If this bit is set, then no_sub will be set to 1 during 178: re_compile_pattern. */ 179: # define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) 180: # endif /* __OPTION_POSIX_REGEXP_GLIBC */ 181: #endif 182: 183: /* This global variable defines the particular regexp syntax to use (for 184: some interfaces). When a regexp is compiled, the syntax used is 185: stored in the pattern buffer, so changing this does not affect 186: already-compiled regexps. */ 187: extern reg_syntax_t re_syntax_options; 188: 189: #ifdef __USE_GNU 190: /* Define combinations of the above bits for the standard possibilities. 191: (The [[[ comments delimit what gets put into the Texinfo file, so 192: don't delete them!) */ 193: /* [[[begin syntaxes]]] */ 194: #define RE_SYNTAX_EMACS 0 195: 196: #define RE_SYNTAX_AWK \ 197: (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ 198: | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 199: | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ 200: | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ 201: | RE_CHAR_CLASSES \ 202: | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) 203: 204: #define RE_SYNTAX_GNU_AWK \ 205: ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 206: | RE_INVALID_INTERVAL_ORD) \ 207: & ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS \ 208: | RE_CONTEXT_INVALID_OPS )) 209: 210: #define RE_SYNTAX_POSIX_AWK \ 211: (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ 212: | RE_INTERVALS | RE_NO_GNU_OPS \ 213: | RE_INVALID_INTERVAL_ORD) 214: 215: #define RE_SYNTAX_GREP \ 216: (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ 217: | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ 218: | RE_NEWLINE_ALT) 219: 220: #define RE_SYNTAX_EGREP \ 221: (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ 222: | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ 223: | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ 224: | RE_NO_BK_VBAR) 225: 226: #define RE_SYNTAX_POSIX_EGREP \ 227: (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ 228: | RE_INVALID_INTERVAL_ORD) 229: 230: /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ 231: #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC 232: 233: #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC 234: 235: /* Syntax bits common to both basic and extended POSIX regex syntax. */ 236: #define _RE_SYNTAX_POSIX_COMMON \ 237: (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ 238: | RE_INTERVALS | RE_NO_EMPTY_RANGES) 239: 240: #ifdef __OPTION_POSIX_REGEXP_GLIBC 241: #define RE_SYNTAX_POSIX_BASIC \ 242: (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) 243: #else 244: #define RE_SYNTAX_POSIX_BASIC \ 245: (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) 246: #endif 247: 248: /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes 249: RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this 250: isn't minimal, since other operators, such as \`, aren't disabled. */ 251: #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ 252: (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) 253: 254: #define RE_SYNTAX_POSIX_EXTENDED \ 255: (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 256: | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ 257: | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ 258: | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) 259: 260: /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is 261: removed and RE_NO_BK_REFS is added. */ 262: #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ 263: (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ 264: | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ 265: | RE_NO_BK_PARENS | RE_NO_BK_REFS \ 266: | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) 267: /* [[[end syntaxes]]] */ 268: 269: /* Maximum number of duplicates an interval can allow. Some systems 270: (erroneously) define this in other header files, but we want our 271: value, so remove any previous define. */ 272: # ifdef RE_DUP_MAX 273: # undef RE_DUP_MAX 274: # endif 275: /* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ 276: # define RE_DUP_MAX (0x7fff) 277: #endif 278: 279: 280: /* POSIX `cflags' bits (i.e., information for `regcomp'). */ 281: 282: /* If this bit is set, then use extended regular expression syntax. 283: If not set, then use basic regular expression syntax. */ 284: #define REG_EXTENDED 1 285: 286: /* If this bit is set, then ignore case when matching. 287: If not set, then case is significant. */ 288: #define REG_ICASE (REG_EXTENDED << 1) 289: 290: /* If this bit is set, then anchors do not match at newline 291: characters in the string. 292: If not set, then anchors do match at newlines. */ 293: #define REG_NEWLINE (REG_ICASE << 1) 294: 295: /* If this bit is set, then report only success or fail in regexec. 296: If not set, then returns differ between not matching and errors. */ 297: #define REG_NOSUB (REG_NEWLINE << 1) 298: 299: 300: /* POSIX `eflags' bits (i.e., information for regexec). */ 301: 302: /* If this bit is set, then the beginning-of-line operator doesn't match 303: the beginning of the string (presumably because it's not the 304: beginning of a line). 305: If not set, then the beginning-of-line operator does match the 306: beginning of the string. */ 307: #define REG_NOTBOL 1 308: 309: /* Like REG_NOTBOL, except for the end-of-line. */ 310: #define REG_NOTEOL (1 << 1) 311: 312: #ifdef __OPTION_POSIX_REGEXP_GLIBC 313: /* Use PMATCH[0] to delimit the start and end of the search in the 314: buffer. */ 315: #define REG_STARTEND (1 << 2) 316: #endif 317: 318: 319: /* If any error codes are removed, changed, or added, update the 320: `re_error_msg' table in regex.c. */ 321: typedef enum 322: { 323: #if defined _XOPEN_SOURCE || defined __USE_XOPEN2K 324: REG_ENOSYS = -1, /* This will never happen for this implementation. */ 325: #endif 326: 327: REG_NOERROR = 0, /* Success. */ 328: REG_NOMATCH, /* Didn't find a match (for regexec). */ 329: 330: /* POSIX regcomp return error codes. (In the order listed in the 331: standard.) */ 332: REG_BADPAT, /* Invalid pattern. */ 333: REG_ECOLLATE, /* Inalid collating element. */ 334: REG_ECTYPE, /* Invalid character class name. */ 335: REG_EESCAPE, /* Trailing backslash. */ 336: REG_ESUBREG, /* Invalid back reference. */ 337: REG_EBRACK, /* Unmatched left bracket. */ 338: REG_EPAREN, /* Parenthesis imbalance. */ 339: REG_EBRACE, /* Unmatched \{. */ 340: REG_BADBR, /* Invalid contents of \{\}. */ 341: REG_ERANGE, /* Invalid range end. */ 342: REG_ESPACE, /* Ran out of memory. */ 343: REG_BADRPT, /* No preceding re for repetition op. */ 344: 345: /* Error codes we've added. */ 346: REG_EEND, /* Premature end. */ 347: REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ 348: REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ 349: } reg_errcode_t; 350: 351: /* This data structure represents a compiled pattern. Before calling 352: the pattern compiler, the fields `buffer', `allocated', `fastmap', 353: and `translate' can be set. After the pattern has been compiled, 354: the fields `re_nsub', `not_bol' and `not_eol' are available. All 355: other fields are private to the regex routines. */ 356: 357: #ifndef RE_TRANSLATE_TYPE 358: # define __RE_TRANSLATE_TYPE unsigned char * 359: # ifdef __USE_GNU 360: # define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE 361: # endif 362: #endif 363: 364: #ifdef __USE_GNU 365: # define __REPB_PREFIX(name) name 366: #else 367: # define __REPB_PREFIX(name) __##name 368: #endif 369: 370: struct re_pattern_buffer 371: { 372: /* Space that holds the compiled pattern. It is declared as 373: `unsigned char *' because its elements are sometimes used as 374: array indexes. */ 375: unsigned char *__REPB_PREFIX(buffer); 376: 377: /* Number of bytes to which `buffer' points. */ 378: unsigned long int __REPB_PREFIX(allocated); 379: 380: /* Number of bytes actually used in `buffer'. */ 381: unsigned long int __REPB_PREFIX(used); 382: 383: /* Syntax setting with which the pattern was compiled. */ 384: reg_syntax_t __REPB_PREFIX(syntax); 385: 386: /* Pointer to a fastmap, if any, otherwise zero. re_search uses the 387: fastmap, if there is one, to skip over impossible starting points 388: for matches. */ 389: char *__REPB_PREFIX(fastmap); 390: 391: /* Either a translate table to apply to all characters before 392: comparing them, or zero for no translation. The translation is 393: applied to a pattern when it is compiled and to a string when it 394: is matched. */ 395: __RE_TRANSLATE_TYPE __REPB_PREFIX(translate); 396: 397: /* Number of subexpressions found by the compiler. */ 398: size_t re_nsub; 399: 400: /* Zero if this pattern cannot match the empty string, one else. 401: Well, in truth it's used only in `re_search_2', to see whether or 402: not we should use the fastmap, so we don't set this absolutely 403: perfectly; see `re_compile_fastmap' (the `duplicate' case). */ 404: unsigned __REPB_PREFIX(can_be_null) : 1; 405: 406: /* If REGS_UNALLOCATED, allocate space in the `regs' structure 407: for `max (RE_NREGS, re_nsub + 1)' groups. 408: If REGS_REALLOCATE, reallocate space if necessary. 409: If REGS_FIXED, use what's there. */ 410: #ifdef __USE_GNU 411: # define REGS_UNALLOCATED 0 412: # define REGS_REALLOCATE 1 413: # define REGS_FIXED 2 414: #endif 415: unsigned __REPB_PREFIX(regs_allocated) : 2; 416: 417: /* Set to zero when `regex_compile' compiles a pattern; set to one 418: by `re_compile_fastmap' if it updates the fastmap. */ 419: unsigned __REPB_PREFIX(fastmap_accurate) : 1; 420: 421: /* If set, `re_match_2' does not return information about 422: subexpressions. */ 423: unsigned __REPB_PREFIX(no_sub) : 1; 424: 425: /* If set, a beginning-of-line anchor doesn't match at the beginning 426: of the string. */ 427: unsigned __REPB_PREFIX(not_bol) : 1; 428: 429: /* Similarly for an end-of-line anchor. */ 430: unsigned __REPB_PREFIX(not_eol) : 1; 431: 432: /* If true, an anchor at a newline matches. */ 433: unsigned __REPB_PREFIX(newline_anchor) : 1; 434: }; 435: 436: typedef struct re_pattern_buffer regex_t; 437: 438: /* Type for byte offsets within the string. POSIX mandates this. */ 439: typedef int regoff_t; 440: 441: 442: #ifdef __USE_GNU 443: /* This is the structure we store register match data in. See 444: regex.texinfo for a full description of what registers match. */ 445: struct re_registers 446: { 447: unsigned num_regs; 448: regoff_t *start; 449: regoff_t *end; 450: }; 451: 452: 453: /* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, 454: `re_match_2' returns information about at least this many registers 455: the first time a `regs' structure is passed. */ 456: # ifndef RE_NREGS 457: # define RE_NREGS 30 458: # endif 459: #endif 460: 461: 462: /* POSIX specification for registers. Aside from the different names than 463: `re_registers', POSIX uses an array of structures, instead of a 464: structure of arrays. */ 465: typedef struct 466: { 467: regoff_t rm_so; /* Byte offset from string's start to substring's start. */ 468: regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ 469: } regmatch_t; 470: 471: /* Declarations for routines. */ 472: 473: #ifdef __USE_GNU 474: /* Sets the current default syntax to SYNTAX, and return the old syntax. 475: You can also simply assign to the `re_syntax_options' variable. */ 476: extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax); 477: 478: /* Compile the regular expression PATTERN, with length LENGTH 479: and syntax given by the global `re_syntax_options', into the buffer 480: BUFFER. Return NULL if successful, and an error string if not. 481: 482: To free the allocated storage, you must call `regfree' on BUFFER. 483: Note that the translate table must either have been initialised by 484: `regcomp', with a malloc'ed value, or set to NULL before calling 485: `regfree'. */ 486: extern const char *re_compile_pattern (const char *__pattern, size_t __length, 487: struct re_pattern_buffer *__buffer); 488: 489: 490: /* Compile a fastmap for the compiled pattern in BUFFER; used to 491: accelerate searches. Return 0 if successful and -2 if was an 492: internal error. */ 493: extern int re_compile_fastmap (struct re_pattern_buffer *__buffer); 494: 495: 496: /* Search in the string STRING (with length LENGTH) for the pattern 497: compiled into BUFFER. Start searching at position START, for RANGE 498: characters. Return the starting position of the match, -1 for no 499: match, or -2 for an internal error. Also return register 500: information in REGS (if REGS and BUFFER->no_sub are nonzero). */ 501: extern int re_search (struct re_pattern_buffer *__buffer, const char *__string, 502: int __length, int __start, int __range, 503: struct re_registers *__regs); 504: 505: 506: /* Like `re_search', but search in the concatenation of STRING1 and 507: STRING2. Also, stop searching at index START + STOP. */ 508: extern int re_search_2 (struct re_pattern_buffer *__buffer, 509: const char *__string1, int __length1, 510: const char *__string2, int __length2, int __start, 511: int __range, struct re_registers *__regs, int __stop); 512: 513: 514: /* Like `re_search', but return how many characters in STRING the regexp 515: in BUFFER matched, starting at position START. */ 516: extern int re_match (struct re_pattern_buffer *__buffer, const char *__string, 517: int __length, int __start, struct re_registers *__regs); 518: 519: 520: /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ 521: extern int re_match_2 (struct re_pattern_buffer *__buffer, 522: const char *__string1, int __length1, 523: const char *__string2, int __length2, int __start, 524: struct re_registers *__regs, int __stop); 525: 526: 527: /* Set REGS to hold NUM_REGS registers, storing them in STARTS and 528: ENDS. Subsequent matches using BUFFER and REGS will use this memory 529: for recording register information. STARTS and ENDS must be 530: allocated with malloc, and must each be at least `NUM_REGS * sizeof 531: (regoff_t)' bytes long. 532: 533: If NUM_REGS == 0, then subsequent matches should allocate their own 534: register data. 535: 536: Unless this function is called, the first search or match using 537: PATTERN_BUFFER will allocate its own register data, without 538: freeing the old data. */ 539: extern void re_set_registers (struct re_pattern_buffer *__buffer, 540: struct re_registers *__regs, 541: unsigned int __num_regs, 542: regoff_t *__starts, regoff_t *__ends); 543: #endif /* Use GNU */ 544: 545: #if defined _REGEX_RE_COMP || (defined _LIBC && defined __USE_BSD) 546: # ifndef _CRAY 547: /* 4.2 bsd compatibility. */ 548: extern char *re_comp (const char *); 549: extern int re_exec (const char *); 550: # endif 551: #endif 552: 553: /* GCC 2.95 and later have "__restrict"; C99 compilers have 554: "restrict", and "configure" may have defined "restrict". */ 555: #ifndef __restrict 556: # if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) 557: # if defined restrict || 199901L <= __STDC_VERSION__ 558: # define __restrict restrict 559: # else 560: # define __restrict 561: # endif 562: # endif 563: #endif 564: /* gcc 3.1 and up support the [restrict] syntax. */ 565: #ifndef __restrict_arr 566: # if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) \ 567: && !defined __GNUG__ 568: # define __restrict_arr __restrict 569: # else 570: # define __restrict_arr 571: # endif 572: #endif 573: 574: /* POSIX compatibility. */ 575: extern int regcomp (regex_t *__restrict __preg, 576: const char *__restrict __pattern, 577: int __cflags); 578: 579: extern int regexec (const regex_t *__restrict __preg, 580: const char *__restrict __string, size_t __nmatch, 581: regmatch_t __pmatch[__restrict_arr], 582: int __eflags); 583: 584: extern size_t regerror (int __errcode, const regex_t *__restrict __preg, 585: char *__restrict __errbuf, size_t __errbuf_size); 586: 587: extern void regfree (regex_t *__preg); 588: 589: 590: #ifdef __cplusplus 591: } 592: #endif /* C++ */ 593: 594: #endif /* regex.h */ 595: