ALT Linux Bugzilla
– Attachment 1209 Details for
Bug 8314
mawk's regexps don't conform to POSIX
New bug
|
Search
|
[?]
|
Help
Register
|
Log In
[x]
|
Forgot Password
Login:
[x]
|
EN
|
RU
[patch]
Patch by Aleksey Cheusov
mawk_external_regexp.patch (text/plain), 10.11 KB, created by
Kirill A. Shutemov
on 2005-10-21 16:37:56 MSD
(
hide
)
Description:
Patch by Aleksey Cheusov
Filename:
MIME Type:
Creator:
Kirill A. Shutemov
Created:
2005-10-21 16:37:56 MSD
Size:
10.11 KB
patch
obsolete
>diff -Nur ../mawk-1.3.3-orig/configure.in ./configure.in >--- ../mawk-1.3.3-orig/configure.in 1995-10-16 14:25:00.000000000 +0200 >+++ ./configure.in 2005-06-09 20:35:40.000000000 +0300 >@@ -32,10 +32,20 @@ > dnl > AC_INIT(mawk.h) > builtin(include,mawk.ac.m4) >+ > GET_USER_DEFAULTS > PROG_CC_NO_MINUS_G_NONSENSE > AC_PROG_CPP > NOTSET_THEN_DEFAULT(CFLAGS,-O) >+ >+AC_ARG_WITH(local-regexp, >+[ --with-local-regexp use the mawk's regexp engine], >+[ >+if test "x${withval}" = xyes; then >+ CFLAGS="$CFLAGS -DLOCAL_REGEXP" >+fi >+]) >+ > LOOK_FOR_MATH_LIBRARY > WHICH_YACC > COMPILER_ATTRIBUTES >@@ -45,5 +55,6 @@ > FPRINTF_IN_STDIO > FIND_OR_COMPUTE_MAX__INT > DREADED_FPE_TESTS >+ > DO_CONFIG_H > AC_OUTPUT(Makefile) >diff -Nur ../mawk-1.3.3-orig/main.c ./main.c >--- ../mawk-1.3.3-orig/main.c 1995-06-10 01:57:19.000000000 +0300 >+++ ./main.c 2005-06-09 20:35:40.000000000 +0300 >@@ -44,6 +44,10 @@ > > /* main.c */ > >+#ifndef LOCAL_REGEXP >+# include <locale.h> >+#endif >+ > #include "mawk.h" > #include "init.h" > #include "code.h" >@@ -53,11 +57,22 @@ > short mawk_state ; /* 0 is compiling */ > int exit_code ; > >+static void initialize_locale (void) >+{ >+#ifndef LOCAL_REGEXP >+ setlocale(LC_CTYPE, ""); >+ setlocale(LC_COLLATE, ""); >+ setlocale(LC_MESSAGES, ""); >+ setlocale(LC_NUMERIC, "C"); >+ setlocale(LC_TIME, ""); >+#endif >+} >+ > int > main(argc, argv) > int argc ; char **argv ; > { >- >+ initialize_locale (); > initialize(argc, argv) ; > > parse() ; >diff -Nur ../mawk-1.3.3-orig/Makefile.in ./Makefile.in >--- ../mawk-1.3.3-orig/Makefile.in 1996-02-01 07:05:40.000000000 +0200 >+++ ./Makefile.in 2005-06-09 20:35:40.000000000 +0300 >@@ -6,7 +6,7 @@ > CC = @CC@ > > CFLAGS = @CFLAGS@ >- >+LDFLAGS = @LDFLAGS@ > MATHLIB = @MATHLIB@ > > YACC = @YACC@ >@@ -25,17 +25,13 @@ > fin.o files.o scancode.o matherr.o fcall.o version.o\ > missing.o > >-REXP_O=rexp/rexp.o rexp/rexp0.o rexp/rexp1.o rexp/rexp2.o\ >- rexp/rexp3.o >- >-REXP_C=rexp/rexp.c rexp/rexp0.c rexp/rexp1.c rexp/rexp2.c\ >- rexp/rexp3.c >- >+REXP_O=rexp/regexp.o >+REXP_C=rexp/regexp.c > > mawk_and_test : mawk mawk_test fpe_test > > mawk : $(O) rexp/.done >- $(CC) $(CFLAGS) -o mawk $(O) $(REXP_O) $(MATHLIB) >+ $(CC) $(LDFLAGS) -o mawk $(O) $(REXP_O) $(MATHLIB) > > mawk_test : mawk # test that we have a sane mawk > @cp mawk test/mawk >diff -Nur ../mawk-1.3.3-orig/re_cmpl.c ./re_cmpl.c >--- ../mawk-1.3.3-orig/re_cmpl.c 2005-06-09 12:06:39.000000000 +0300 >+++ ./re_cmpl.c 2005-06-09 20:35:40.000000000 +0300 >@@ -102,10 +102,10 @@ > if (!(p->re = REcompile(s))) > { > if (mawk_state == EXECUTION) >- rt_error(efmt, REerrlist[REerrno], s) ; >+ rt_error(efmt, REerror (), s) ; > else /* compiling */ > { >- compile_error(efmt, REerrlist[REerrno], s) ; >+ compile_error(efmt, REerror(), s) ; > return (PTR) 0 ; > } > } >diff -Nur ../mawk-1.3.3-orig/regexp.h ./regexp.h >--- ../mawk-1.3.3-orig/regexp.h 1993-07-03 21:58:19.000000000 +0300 >+++ ./regexp.h 2005-06-09 20:35:40.000000000 +0300 >@@ -25,8 +25,4 @@ > int PROTO( REtest, (char *, PTR) ) ; > char *PROTO( REmatch, (char *, PTR, unsigned *) ) ; > void PROTO( REmprint, (PTR , FILE*) ) ; >- >-extern int REerrno ; >-extern char *REerrlist[] ; >- >- >+char *PROTO( REerror, (void)) ; >diff -Nur ../mawk-1.3.3-orig/rexp/Makefile ./rexp/Makefile >--- ../mawk-1.3.3-orig/rexp/Makefile 1993-07-24 20:55:07.000000000 +0300 >+++ ./rexp/Makefile 2005-06-09 20:35:40.000000000 +0300 >@@ -9,8 +9,8 @@ > CC = cc > CFLAGS = -O -DMAWK -I.. > >-O=rexp.o rexp0.o rexp1.o rexp2.o rexp3.o >-DB=rexpdb.o >+O=regexp.o >+DB= > > all : $(O) > @cat </dev/null > .done >diff -Nur ../mawk-1.3.3-orig/rexp/regexp.c ./rexp/regexp.c >--- ../mawk-1.3.3-orig/rexp/regexp.c 1970-01-01 03:00:00.000000000 +0300 >+++ ./rexp/regexp.c 2005-06-09 20:35:40.000000000 +0300 >@@ -0,0 +1,12 @@ >+#ifdef LOCAL_REGEXP >+# include "rexp.c" >+# include "rexp0.c" >+# include "rexp1.c" >+# include "rexp2.c" >+# include "rexp3.c" >+# include "rexp4.c" >+# include "rexpdb.c" >+#else >+# include "rexp4.c" >+# include "regexp_system.c" >+#endif >diff -Nur ../mawk-1.3.3-orig/rexp/regexp_system.c ./rexp/regexp_system.c >--- ../mawk-1.3.3-orig/rexp/regexp_system.c 1970-01-01 03:00:00.000000000 +0300 >+++ ./rexp/regexp_system.c 2005-06-09 20:52:32.000000000 +0300 >@@ -0,0 +1,140 @@ >+#include <sys/types.h> >+#include <stdio.h> >+#include <regex.h> >+#include <string.h> >+#include <stdlib.h> >+#include <errno.h> >+ >+typedef struct { >+ regex_t re; >+ char *regexp; >+} mawk_re_t; >+ >+static mawk_re_t *last_used_regexp = NULL; >+static int err_code = 0; >+ >+void prepare_regexp (regexp) >+ char *regexp; >+{ >+ int bs = 0; >+ char *tail = regexp; >+ char ch; >+ >+ while ((ch = *regexp++) != 0){ >+ if (bs){ >+ switch (ch){ >+ case 'n': *tail++ = '\n'; break; >+ case 't': *tail++ = '\t'; break; >+ case 'f': *tail++ = '\f'; break; >+ case 'b': *tail++ = '\b'; break; >+ case 'r': *tail++ = '\r'; break; >+ case 'a': *tail++ = '\07'; break; >+ case 'v': *tail++ = '\013'; break; >+ default: *tail++ = '\\'; *tail++ = ch; >+ } >+ >+ bs = 0; >+ }else{ >+ if (ch == '\\'){ >+ bs = 1; >+ }else{ >+ *tail++ = ch; >+ } >+ } >+ } >+ >+ *tail = 0; >+} >+ >+void * REcompile(regexp) >+ char *regexp; >+{ >+ mawk_re_t *re = (mawk_re_t*) malloc (sizeof (mawk_re_t)); >+ size_t len = strlen (regexp); >+ char *new_regexp = (char *) malloc (len + 3); >+/* fprintf (stderr, "REcompile: %s\n", regexp); */ >+ >+ if (!re || !new_regexp) >+ return NULL; >+ >+ new_regexp [0] = '('; >+ memcpy (new_regexp + 1, regexp, len); >+ new_regexp [len+1] = ')'; >+ new_regexp [len+2] = 0; >+ >+ prepare_regexp (new_regexp); >+ >+ last_used_regexp = re; >+ >+ memset (re, 0, sizeof (mawk_re_t)); >+ re -> regexp = strdup (new_regexp); >+ err_code = regcomp (&re->re, new_regexp, REG_EXTENDED | REG_NEWLINE); >+ >+ free (new_regexp); >+ >+ if (err_code) >+ return NULL; >+ >+ return re; >+} >+ >+int >+REtest(str, re) >+ char *str ; >+ mawk_re_t* re ; >+{ >+/* fprintf (stderr, "REtest: \"%s\" ~ /%s/", str, re -> regexp); */ >+ >+ last_used_regexp = re; >+ >+ if (regexec (&re->re, str, 0, NULL, 0)){ >+/* fprintf (stderr, "=1\n"); */ >+ return 0; >+ }else{ >+/* fprintf (stderr, "=0\n"); */ >+ return 1; >+ } >+} >+ >+char *REmatch(str, re, lenp) >+ char *str ; >+ mawk_re_t* re ; >+ unsigned *lenp ; >+{ >+ regmatch_t match [100]; >+/* fprintf (stderr, "REmatch: \"%s\" ~ /%s/", str, re -> regexp); */ >+ >+ last_used_regexp = re; >+ >+ if (!regexec (&re->re, str, 100, match, 0)){ >+ *lenp = match [0].rm_eo - match[0].rm_so; >+/* fprintf (stderr, "=%i/%i\n", match [0].rm_so, *lenp); */ >+ return str + match [0].rm_so; >+ }else{ >+/* fprintf (stderr, "=0\n"); */ >+ return NULL; >+ } >+} >+ >+void REmprint(m, f) >+ void * m ; >+ FILE *f ; >+{ >+ /* no debugging code available */ >+ abort (); >+} >+ >+static char error_buffer [2048]; >+ >+char *REerror () >+{ >+ size_t len; >+ if (last_used_regexp){ >+ len = regerror (err_code, &last_used_regexp -> re, >+ error_buffer, sizeof (error_buffer)); >+ return error_buffer; >+ }else{ >+ snprintf (error_buffer, sizeof (error_buffer), "malloc failed: %s", >+ strerror (errno)); >+ } >+} >diff -Nur ../mawk-1.3.3-orig/rexp/rexp2.c ./rexp/rexp2.c >--- ../mawk-1.3.3-orig/rexp/rexp2.c 1995-06-10 01:45:34.000000000 +0300 >+++ ./rexp/rexp2.c 2005-06-09 20:35:40.000000000 +0300 >@@ -318,18 +318,6 @@ > > #ifdef MAWK > >-char * >-is_string_split(p, lenp) >- register STATE *p ; >- unsigned *lenp ; >-{ >- if (p[0].type == M_STR && p[1].type == M_ACCEPT) >- { >- *lenp = p->len ; >- return p->data.str ; >- } >- else return (char *) 0 ; >-} > #else /* mawk provides its own str_str */ > > char * >diff -Nur ../mawk-1.3.3-orig/rexp/rexp4.c ./rexp/rexp4.c >--- ../mawk-1.3.3-orig/rexp/rexp4.c 1970-01-01 03:00:00.000000000 +0300 >+++ ./rexp/rexp4.c 2005-06-09 20:35:40.000000000 +0300 >@@ -0,0 +1,14 @@ >+#include "rexp.h" >+ >+char * >+is_string_split(p, lenp) >+ register STATE *p ; >+ unsigned *lenp ; >+{ >+ if (p[0].type == M_STR && p[1].type == M_ACCEPT) >+ { >+ *lenp = p->len ; >+ return p->data.str ; >+ } >+ else return (char *) 0 ; >+} >diff -Nur ../mawk-1.3.3-orig/rexp/rexp.c ./rexp/rexp.c >--- ../mawk-1.3.3-orig/rexp/rexp.c 2005-06-09 12:06:39.000000000 +0300 >+++ ./rexp/rexp.c 2005-06-09 20:35:40.000000000 +0300 >@@ -236,3 +236,9 @@ > fprintf(stderr, "REcompile() - panic: %s\n", s) ; > exit(100) ; > } >+ >+/* getting regexp error message */ >+char *REerror (void) >+{ >+ return REerrlist [REerrno]; >+} >diff -Nur ../mawk-1.3.3-orig/test/mawktest ./test/mawktest >--- ../mawk-1.3.3-orig/test/mawktest 2005-06-09 12:06:39.000000000 +0300 >+++ ./test/mawktest 2005-06-09 20:35:40.000000000 +0300 >@@ -22,16 +22,16 @@ > echo > echo testing input and field splitting > >-mawk -f wc.awk $dat | cmp -s - wc-awk.out || exit >+LC_ALL=C mawk -f wc.awk $dat | cmp -s - wc-awk.out || exit > > echo input and field splitting OK > ##################################### > > echo > echo testing regular expression matching >-mawk -f reg0.awk $dat > temp$$ >-mawk -f reg1.awk $dat >> temp$$ >-mawk -f reg2.awk $dat >> temp$$ >+LC_ALL=C mawk -f reg0.awk $dat > temp$$ >+LC_ALL=C mawk -f reg1.awk $dat >> temp$$ >+LC_ALL=C mawk -f reg2.awk $dat >> temp$$ > > cmp -s reg-awk.out temp$$ || exit > >@@ -42,10 +42,10 @@ > if [ -c /dev/full ]; then > echo testing checking for write errors > # Check for write errors noticed when closing the file >- mawk '{print}' <full-awk.dat >/dev/full 2>/dev/null && exit >+ LC_ALL=C mawk '{print}' <full-awk.dat >/dev/full 2>/dev/null && exit > # Check for write errors noticed on writing > # The file has to be bigger than the buffer size of the libc >- mawk '{print}' <../scan.c >/dev/full 2>/dev/null && exit >+ LC_ALL=C mawk '{print}' <../scan.c >/dev/full 2>/dev/null && exit > > echo checking for write errors OK > else >@@ -57,7 +57,7 @@ > echo > echo testing arrays and flow of control > >-mawk -f wfrq0.awk $dat | cmp -s - wfrq-awk.out || exit >+LC_ALL=C mawk -f wfrq0.awk $dat | cmp -s - wfrq-awk.out || exit > > echo array test OK > ################################# >@@ -65,7 +65,7 @@ > echo > echo testing function calls and general stress test > >-mawk -f ../examples/decl.awk $dat | cmp -s - decl-awk.out || exit >+LC_ALL=C mawk -f ../examples/decl.awk $dat | cmp -s - decl-awk.out || exit > > echo general stress test passed >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 8314
: 1209