2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
15 * Revision 1.1.7.2 1994/08/25 17:34:40
17 * [1994/08/25 17:23:12]
19 * Revision 1.1.7.1 1994/06/09 13:50:35
20 * fixed copyright in src/file
21 * [1994/06/08 21:23:56]
23 * Revision 1.1.5.3 1993/01/18 19:51:30
24 * Embedded copyright notice
25 * [1993/01/18 19:26:02
27 * Revision 1.1.5.2 1992/11/24 15:37:53
28 * Change include file install directory from .../afs to .../dcedfs.
29 * [1992/11/22 16:05:26]
31 * Revision 1.1.3.3 1992/01/24 01:48:02
33 * [1992/01/24 00:02:28]
35 * Revision 1.1.3.2 1992/01/23 19:03:28
36 * Moving files onto the branch for dfs6.3 merge
37 * [1992/01/23 18:19:02]
38 * Revision 1.1.1.2 1992/01/22 19:29:25
39 * Bring in dfs6.3 sources
41 * Revision 1.1 1992/01/19 02:56:43
47 * regex.c -- regular expression patter matching functions
51 #include <afs/param.h>
53 * routines to do regular expression matching
59 * ... returns 0 if the string s was compiled successfully,
60 * a pointer to an error message otherwise.
61 * If passed 0 or a null string returns without changing
62 * the currently compiled re (see note 11 below).
66 * ... returns 1 if the string s matches the last compiled regular
68 * 0 if the string s failed to match the last compiled
69 * regular expression, and
70 * -1 if the compiled regular expression was invalid
71 * (indicating an internal error).
73 * The strings passed to both re_comp and re_exec may have trailing or
74 * embedded newline characters; they are terminated by nulls.
76 * The identity of the author of these routines is lost in antiquity;
77 * this is essentially the same as the re code in the original V6 ed.
79 * The regular expressions recognized are described below. This description
80 * is essentially the same as that for ed.
82 * A regular expression specifies a set of strings of characters.
83 * A member of this set of strings is said to be matched by
84 * the regular expression. In the following specification for
85 * regular expressions the word `character' means any character but NUL.
87 * 1. Any character except a special character matches itself.
88 * Special characters are the regular expression delimiter plus
89 * \ [ . and sometimes ^ * $.
90 * 2. A . matches any character.
91 * 3. A \ followed by any character except a digit or ( )
92 * matches that character.
93 * 4. A nonempty string s bracketed [s] (or [^s]) matches any
94 * character in (or not in) s. In s, \ has no special meaning,
95 * and ] may only appear as the first letter. A substring
96 * a-b, with a and b in ascending ASCII order, stands for
97 * the inclusive range of ASCII characters.
98 * 5. A regular expression of form 1-4 followed by * matches a
99 * sequence of 0 or more matches of the regular expression.
100 * 6. A regular expression, x, of form 1-8, bracketed \(x\)
101 * matches what x matches.
102 * 7. A \ followed by a digit n matches a copy of the string that the
103 * bracketed regular expression beginning with the nth \( matched.
104 * 8. A regular expression of form 1-8, x, followed by a regular
105 * expression of form 1-7, y matches a match for x followed by
106 * a match for y, with the x match being as long as possible
107 * while still permitting a y match.
108 * 9. A regular expression of form 1-8 preceded by ^ (or followed
109 * by $), is constrained to matches that begin at the left
110 * (or end at the right) end of a line.
111 * 10. A regular expression of form 1-9 picks out the longest among
112 * the leftmost matches in a line.
113 * 11. An empty regular expression stands for a copy of the last
114 * regular expression encountered.
135 static char expbuf[ESIZE], *braslist[NBRA], *braelist[NBRA];
141 static int advance();
142 static int backref();
147 * compile the regular expression argument into a dfa
149 char * re_comp(register char *sp)
152 register char *ep = expbuf;
153 int cclcnt, numbra = 0;
156 char *bracketp = &bracket[0];
157 static char *retoolong = "Regular expression too long";
159 #define comperr(msg) {expbuf[0] = 0; numbra = 0; return(msg); }
161 if (sp == 0 || *sp == '\0') {
163 return("No previous regular expression");
173 if (ep >= &expbuf[ESIZE])
175 if ((c = *sp++) == '\0') {
176 if (bracketp != bracket)
177 comperr("unmatched \\(");
191 if (lastep == 0 || *lastep == CBRA || *lastep == CKET)
206 if ((c = *sp++) == '^') {
212 comperr("missing ]");
213 if (c == '-' && ep [-1] != 0) {
214 if ((c = *sp++) == ']') {
223 if (ep >= &expbuf[ESIZE])
229 if (ep >= &expbuf[ESIZE])
231 } while ((c = *sp++) != ']');
236 if ((c = *sp++) == '(') {
238 comperr("too many \\(\\) pairs");
239 *bracketp++ = numbra;
245 if (bracketp <= bracket)
246 comperr("unmatched \\)");
251 if (c >= '1' && c < ('1' + NBRA)) {
269 * match the argument string against the compiled re
271 int re_exec(register char *p1)
273 register char *p2 = expbuf;
277 for (c = 0; c < NBRA; c++) {
282 return((advance(p1, p2)));
284 * fast check for first character
291 if (rv = advance(p1, p2))
300 if (rv = advance(p1, p2))
307 * try to match the next thing in the dfa
311 register char *lp, *ep;
313 register char *curlp;
339 if (cclass(ep, *lp++, 1)) {
346 if (cclass(ep, *lp++, 0)) {
353 braslist[*ep++] = lp;
357 braelist[*ep++] = lp;
361 if (braelist[i = *ep++] == 0)
363 if (backref(i, lp)) {
364 lp += braelist[i] - braslist[i];
370 if (braelist[i = *ep++] == 0)
373 ct = braelist[i] - braslist[i];
374 while (backref(i, lp))
376 while (lp >= curlp) {
377 if (rv = advance(lp, ep))
399 while (cclass(ep, *lp++, ep[-1] == (CCL|CSTAR)))
407 if (rv = advance(lp, ep))
409 } while (lp > curlp);
425 while (*bp++ == *lp++)
426 if (bp >= braelist[i])
433 register char *set, c;