--- ../mawk-1.3.3-orig/configure.in 1995-10-16 14:25:00.000000000 +0200 +++ ../mawk-1.3.3-orig/configure.in 2005-06-09 20:35:40.000000000 +0300 @@ -32,10 +32,20 @@ dnl AC_INIT(mawk.h) builtin(include,mawk.ac.m4) + GET_USER_DEFAULTS PROG_CC_NO_MINUS_G_NONSENSE AC_PROG_CPP NOTSET_THEN_DEFAULT(CFLAGS,-O) + +AC_ARG_WITH(local-regexp, +[ --with-local-regexp use the mawk's regexp engine], +[ +if test "x${withval}" = xyes; then + CFLAGS="$CFLAGS -DLOCAL_REGEXP" +fi +]) + LOOK_FOR_MATH_LIBRARY WHICH_YACC COMPILER_ATTRIBUTES @@ -45,5 +55,6 @@ FPRINTF_IN_STDIO FIND_OR_COMPUTE_MAX__INT DREADED_FPE_TESTS + DO_CONFIG_H AC_OUTPUT(Makefile) --- ../mawk-1.3.3-orig/main.c 1995-06-10 01:57:19.000000000 +0300 +++ ../mawk-1.3.3-orig/main.c 2005-06-09 20:35:40.000000000 +0300 @@ -44,6 +44,10 @@ /* main.c */ +#ifndef LOCAL_REGEXP +# include +#endif + #include "mawk.h" #include "init.h" #include "code.h" @@ -53,11 +57,22 @@ short mawk_state ; /* 0 is compiling */ int exit_code ; +static void initialize_locale (void) +{ +#ifndef LOCAL_REGEXP + setlocale(LC_CTYPE, ""); + setlocale(LC_COLLATE, ""); + setlocale(LC_MESSAGES, ""); + setlocale(LC_NUMERIC, "C"); + setlocale(LC_TIME, ""); +#endif +} + int main(argc, argv) int argc ; char **argv ; { - + initialize_locale (); initialize(argc, argv) ; parse() ; --- ../mawk-1.3.3-orig/Makefile.in 1996-02-01 07:05:40.000000000 +0200 +++ ../mawk-1.3.3-orig/Makefile.in 2005-06-09 20:35:40.000000000 +0300 @@ -6,7 +6,7 @@ CC = @CC@ CFLAGS = @CFLAGS@ - +LDFLAGS = @LDFLAGS@ MATHLIB = @MATHLIB@ YACC = @YACC@ @@ -25,17 +25,13 @@ fin.o files.o scancode.o matherr.o fcall.o version.o\ missing.o -REXP_O=rexp/rexp.o rexp/rexp0.o rexp/rexp1.o rexp/rexp2.o\ - rexp/rexp3.o - -REXP_C=rexp/rexp.c rexp/rexp0.c rexp/rexp1.c rexp/rexp2.c\ - rexp/rexp3.c - +REXP_O=rexp/regexp.o +REXP_C=rexp/regexp.c mawk_and_test : mawk mawk_test fpe_test mawk : $(O) rexp/.done - $(CC) $(CFLAGS) -o mawk $(O) $(REXP_O) $(MATHLIB) + $(CC) $(LDFLAGS) -o mawk $(O) $(REXP_O) $(MATHLIB) mawk_test : mawk # test that we have a sane mawk @cp mawk test/mawk --- ../mawk-1.3.3-orig/re_cmpl.c 2005-06-09 12:06:39.000000000 +0300 +++ ../mawk-1.3.3-orig/re_cmpl.c 2005-06-09 20:35:40.000000000 +0300 @@ -102,10 +102,10 @@ if (!(p->re = REcompile(s))) { if (mawk_state == EXECUTION) - rt_error(efmt, REerrlist[REerrno], s) ; + rt_error(efmt, REerror (), s) ; else /* compiling */ { - compile_error(efmt, REerrlist[REerrno], s) ; + compile_error(efmt, REerror(), s) ; return (PTR) 0 ; } } --- ../mawk-1.3.3-orig/regexp.h 1993-07-03 21:58:19.000000000 +0300 +++ ../mawk-1.3.3-orig/regexp.h 2005-06-09 20:35:40.000000000 +0300 @@ -25,8 +25,4 @@ int PROTO( REtest, (char *, PTR) ) ; char *PROTO( REmatch, (char *, PTR, unsigned *) ) ; void PROTO( REmprint, (PTR , FILE*) ) ; - -extern int REerrno ; -extern char *REerrlist[] ; - - +char *PROTO( REerror, (void)) ; --- ../mawk-1.3.3-orig/rexp/Makefile 1993-07-24 20:55:07.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/Makefile 2005-06-09 20:35:40.000000000 +0300 @@ -9,8 +9,8 @@ CC = cc CFLAGS = -O -DMAWK -I.. -O=rexp.o rexp0.o rexp1.o rexp2.o rexp3.o -DB=rexpdb.o +O=regexp.o +DB= all : $(O) @cat .done --- ../mawk-1.3.3-orig/rexp/regexp.c 1970-01-01 03:00:00.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/regexp.c 2005-06-09 20:35:40.000000000 +0300 @@ -0,0 +1,12 @@ +#ifdef LOCAL_REGEXP +# include "rexp.c" +# include "rexp0.c" +# include "rexp1.c" +# include "rexp2.c" +# include "rexp3.c" +# include "rexp4.c" +# include "rexpdb.c" +#else +# include "rexp4.c" +# include "regexp_system.c" +#endif --- ../mawk-1.3.3-orig/rexp/regexp_system.c 1970-01-01 03:00:00.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/regexp_system.c 2005-06-09 20:52:32.000000000 +0300 @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include +#include + +typedef struct { + regex_t re; + char *regexp; +} mawk_re_t; + +static mawk_re_t *last_used_regexp = NULL; +static int err_code = 0; + +void prepare_regexp (regexp) + char *regexp; +{ + int bs = 0; + char *tail = regexp; + char ch; + + while ((ch = *regexp++) != 0){ + if (bs){ + switch (ch){ + case 'n': *tail++ = '\n'; break; + case 't': *tail++ = '\t'; break; + case 'f': *tail++ = '\f'; break; + case 'b': *tail++ = '\b'; break; + case 'r': *tail++ = '\r'; break; + case 'a': *tail++ = '\07'; break; + case 'v': *tail++ = '\013'; break; + default: *tail++ = '\\'; *tail++ = ch; + } + + bs = 0; + }else{ + if (ch == '\\'){ + bs = 1; + }else{ + *tail++ = ch; + } + } + } + + *tail = 0; +} + +void * REcompile(regexp) + char *regexp; +{ + mawk_re_t *re = (mawk_re_t*) malloc (sizeof (mawk_re_t)); + size_t len = strlen (regexp); + char *new_regexp = (char *) malloc (len + 3); +/* fprintf (stderr, "REcompile: %s\n", regexp); */ + + if (!re || !new_regexp) + return NULL; + + new_regexp [0] = '('; + memcpy (new_regexp + 1, regexp, len); + new_regexp [len+1] = ')'; + new_regexp [len+2] = 0; + + prepare_regexp (new_regexp); + + last_used_regexp = re; + + memset (re, 0, sizeof (mawk_re_t)); + re -> regexp = strdup (new_regexp); + err_code = regcomp (&re->re, new_regexp, REG_EXTENDED | REG_NEWLINE); + + free (new_regexp); + + if (err_code) + return NULL; + + return re; +} + +int +REtest(str, re) + char *str ; + mawk_re_t* re ; +{ +/* fprintf (stderr, "REtest: \"%s\" ~ /%s/", str, re -> regexp); */ + + last_used_regexp = re; + + if (regexec (&re->re, str, 0, NULL, 0)){ +/* fprintf (stderr, "=1\n"); */ + return 0; + }else{ +/* fprintf (stderr, "=0\n"); */ + return 1; + } +} + +char *REmatch(str, re, lenp) + char *str ; + mawk_re_t* re ; + unsigned *lenp ; +{ + regmatch_t match [100]; +/* fprintf (stderr, "REmatch: \"%s\" ~ /%s/", str, re -> regexp); */ + + last_used_regexp = re; + + if (!regexec (&re->re, str, 100, match, 0)){ + *lenp = match [0].rm_eo - match[0].rm_so; +/* fprintf (stderr, "=%i/%i\n", match [0].rm_so, *lenp); */ + return str + match [0].rm_so; + }else{ +/* fprintf (stderr, "=0\n"); */ + return NULL; + } +} + +void REmprint(m, f) + void * m ; + FILE *f ; +{ + /* no debugging code available */ + abort (); +} + +static char error_buffer [2048]; + +char *REerror () +{ + size_t len; + if (last_used_regexp){ + len = regerror (err_code, &last_used_regexp -> re, + error_buffer, sizeof (error_buffer)); + return error_buffer; + }else{ + snprintf (error_buffer, sizeof (error_buffer), "malloc failed: %s", + strerror (errno)); + } +} --- ../mawk-1.3.3-orig/rexp/rexp2.c 1995-06-10 01:45:34.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/rexp2.c 2005-06-09 20:35:40.000000000 +0300 @@ -318,18 +318,6 @@ #ifdef MAWK -char * -is_string_split(p, lenp) - register STATE *p ; - unsigned *lenp ; -{ - if (p[0].type == M_STR && p[1].type == M_ACCEPT) - { - *lenp = p->len ; - return p->data.str ; - } - else return (char *) 0 ; -} #else /* mawk provides its own str_str */ char * --- ../mawk-1.3.3-orig/rexp/rexp4.c 1970-01-01 03:00:00.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/rexp4.c 2005-06-09 20:35:40.000000000 +0300 @@ -0,0 +1,14 @@ +#include "rexp.h" + +char * +is_string_split(p, lenp) + register STATE *p ; + unsigned *lenp ; +{ + if (p[0].type == M_STR && p[1].type == M_ACCEPT) + { + *lenp = p->len ; + return p->data.str ; + } + else return (char *) 0 ; +} --- ../mawk-1.3.3-orig/rexp/rexp.c 2005-06-09 12:06:39.000000000 +0300 +++ ../mawk-1.3.3-orig/rexp/rexp.c 2005-06-09 20:35:40.000000000 +0300 @@ -236,3 +236,9 @@ fprintf(stderr, "REcompile() - panic: %s\n", s) ; exit(100) ; } + +/* getting regexp error message */ +char *REerror (void) +{ + return REerrlist [REerrno]; +} --- ../mawk-1.3.3-orig/test/mawktest 2005-06-09 12:06:39.000000000 +0300 +++ ../mawk-1.3.3-orig/test/mawktest 2005-06-09 20:35:40.000000000 +0300 @@ -22,16 +22,16 @@ echo echo testing input and field splitting -mawk -f wc.awk $dat | cmp -s - wc-awk.out || exit +LC_ALL=C mawk -f wc.awk $dat | cmp -s - wc-awk.out || exit echo input and field splitting OK ##################################### echo echo testing regular expression matching -mawk -f reg0.awk $dat > temp$$ -mawk -f reg1.awk $dat >> temp$$ -mawk -f reg2.awk $dat >> temp$$ +LC_ALL=C mawk -f reg0.awk $dat > temp$$ +LC_ALL=C mawk -f reg1.awk $dat >> temp$$ +LC_ALL=C mawk -f reg2.awk $dat >> temp$$ cmp -s reg-awk.out temp$$ || exit @@ -42,10 +42,10 @@ if [ -c /dev/full ]; then echo testing checking for write errors # Check for write errors noticed when closing the file - mawk '{print}' /dev/full 2>/dev/null && exit + LC_ALL=C mawk '{print}' /dev/full 2>/dev/null && exit # Check for write errors noticed on writing # The file has to be bigger than the buffer size of the libc - mawk '{print}' <../scan.c >/dev/full 2>/dev/null && exit + LC_ALL=C mawk '{print}' <../scan.c >/dev/full 2>/dev/null && exit echo checking for write errors OK else @@ -57,7 +57,7 @@ echo echo testing arrays and flow of control -mawk -f wfrq0.awk $dat | cmp -s - wfrq-awk.out || exit +LC_ALL=C mawk -f wfrq0.awk $dat | cmp -s - wfrq-awk.out || exit echo array test OK ################################# @@ -65,7 +65,7 @@ echo echo testing function calls and general stress test -mawk -f ../examples/decl.awk $dat | cmp -s - decl-awk.out || exit +LC_ALL=C mawk -f ../examples/decl.awk $dat | cmp -s - decl-awk.out || exit echo general stress test passed