123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559 |
- /*
- * $Id$
- *
- * regexp and regexp substitutions implementations
- *
- * Copyright (C) 2001-2003 FhG Fokus
- *
- * This file is part of ser, a free SIP server.
- *
- * ser is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version
- *
- * For a license to use the ser software under conditions
- * other than those described here, or to purchase support for this
- * software, please contact iptel.org by e-mail at the following addresses:
- * [email protected]
- *
- * ser is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- *
- * History:
- * --------
- * 2003-08-04 created by andrei
- * 2004-11-12 minor api extension, added *count (andrei)
- */
- /*!
- * \file
- * \brief SIP-router core :: regexp and regexp substitutions implementations
- * \ingroup core
- * Module: \ref core
- */
- #include "dprint.h"
- #include "mem/mem.h"
- #include "re.h"
- #include <string.h>
- #define MAX_REPLACE_WITH 100
- #define REPLACE_BUFFER_SIZE 1024
- void subst_expr_free(struct subst_expr* se)
- {
- if (se->replacement.s) pkg_free(se->replacement.s);
- if (se->re) { regfree(se->re); pkg_free(se->re); };
- pkg_free(se);
- }
- /* frees the entire list, head (l) too */
- void replace_lst_free(struct replace_lst* l)
- {
- struct replace_lst* t;
-
- while (l){
- t=l;
- l=l->next;
- if (t->rpl.s) pkg_free(t->rpl.s);
- pkg_free(t);
- }
- }
- int parse_repl(struct replace_with * rw, char ** begin,
- char * end, int *max_token_nb, int with_sep)
- {
- char* p0;
- char * repl;
- str s;
- int token_nb;
- int escape;
- int max_pmatch;
- char *p, c;
- /* parse replacement */
- p = *begin;
- c = *p;
- if(with_sep)
- p++;
- repl= p;
- token_nb=0;
- max_pmatch=0;
- escape=0;
- for(;p<end; p++){
- if (escape){
- escape=0;
- switch (*p){
- /* special char escapes */
- case '\\':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c='\\';
- break;
- case 'n':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c='\n';
- break;
- case 'r':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c='\r';
- break;
- case 't':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c='\t';
- break;
- case PV_MARKER:
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c=PV_MARKER;
- break;
- /* special sip msg parts escapes */
- case 'u':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_URI;
- break;
- /* re matches */
- case '0': /* allow 0, too, reference to the whole match */
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_NMATCH;
- rw[token_nb].u.nmatch=(*p)-'0';
- /* 0 is the whole matched str*/
- if (max_pmatch<rw[token_nb].u.nmatch)
- max_pmatch=rw[token_nb].u.nmatch;
- break;
- default: /* just print current char */
- if (*p!=c){
- WARN("subst_parser:\\%c unknown escape in %s\n", *p, *begin);
- }
- rw[token_nb].size=2;
- rw[token_nb].offset=(p-1)-repl;
- rw[token_nb].type=REPLACE_CHAR;
- rw[token_nb].u.c=*p;
- break;
- }
- token_nb++;
- if (token_nb>=MAX_REPLACE_WITH){
- ERR("subst_parser: too many escapes in the replace part %s\n", *begin);
- goto error;
- }
- }else if (*p=='\\') {
- escape=1;
- }else if (*p==PV_MARKER) {
- s.s = p;
- s.len = end - s.s;
- p0 = pv_parse_spec(&s, &rw[token_nb].u.spec);
- if(p0==NULL)
- {
- ERR("subst_parser: bad specifier in replace part %s\n", *begin);
- goto error;
- }
- rw[token_nb].size=p0-p;
- rw[token_nb].offset=p-repl;
- rw[token_nb].type=REPLACE_SPEC;
- token_nb++;
- p=p0-1;
- }else if (*p==c && with_sep){
- goto found_repl;
- }
- }
- if(with_sep){
- ERR("subst_parser: missing separator: %s\n", *begin);
- goto error;
- }
- found_repl:
- *max_token_nb = max_pmatch;
- *begin = p;
- return token_nb;
- error:
- return -1;
- }
- /* parse a /regular expression/replacement/flags into a subst_expr structure */
- struct subst_expr* subst_parser(str* subst)
- {
- char c;
- char* end;
- char* p;
- char* re;
- char* re_end;
- char* repl;
- char* repl_end;
- struct replace_with rw[MAX_REPLACE_WITH];
- int rw_no;
- int cflags; /* regcomp flags */
- int replace_all;
- struct subst_expr* se;
- regex_t* regex;
- int max_pmatch;
- int r;
-
- /* init */
- se=0;
- regex=0;
- cflags=REG_EXTENDED | REG_NEWLINE; /* don't match newline */
- replace_all=0;
- if (subst->len<3){
- LM_ERR("expression is too short: %.*s\n", subst->len, subst->s);
- goto error;
- }
-
- p=subst->s;
- c=*p;
- if (c=='\\'){
- LM_ERR("invalid separator char <%c> in %.*s\n", c, subst->len, subst->s);
- goto error;
- }
- p++;
- end=subst->s+subst->len;
- /* find re */
- re=p;
- for (;p<end;p++){
- /* if unescaped sep. char */
- if ((*p==c) && (*(p-1)!='\\')) goto found_re;
- }
- LM_ERR("no separator found: %.*s\n", subst->len, subst->s);
- goto error;
- found_re:
- re_end=p;
- if (end < (p + 2)) {
- LM_ERR("String too short\n");
- goto error;
- }
- repl=p+1;
- if ((rw_no = parse_repl(rw, &p, end, &max_pmatch, WITH_SEP)) < 0)
- goto error;
- repl_end = p;
- p++;
-
- /* parse flags */
- for(;p<end; p++){
- switch(*p){
- case 'i':
- cflags|=REG_ICASE;
- break;
- case 's':
- cflags&=(~REG_NEWLINE);
- break;
- case 'g':
- replace_all=1;
- break;
- default:
- LM_ERR("unknown flag %c in %.*s\n", *p, subst->len, subst->s);
- goto error;
- }
- }
- /* compile the re */
- if ((regex=pkg_malloc(sizeof(regex_t)))==0){
- LM_ERR("out of memory\n");
- goto error;
- }
- c=*re_end; /* regcomp expects null terminated strings -- save */
- *re_end=0;
- if (regcomp(regex, re, cflags)!=0){
- pkg_free(regex);
- regex=0;
- *re_end=c; /* restore */
- LM_ERR("bad regular expression %.*s in %.*s\n",
- (int)(re_end-re), re, subst->len, subst->s);
- goto error;
- }
- *re_end=c; /* restore */
- /* construct the subst_expr structure */
- se=pkg_malloc(sizeof(struct subst_expr)+
- ((rw_no)?(rw_no-1)*sizeof(struct replace_with):0));
- /* 1 replace_with structure is already included in subst_expr */
- if (se==0){
- LM_ERR("out of memory\n");
- goto error;
- }
- memset((void*)se, 0, sizeof(struct subst_expr));
- se->re=regex;
- se->replacement.len=repl_end-repl;
- if (se->replacement.len > 0) {
- if ((se->replacement.s=pkg_malloc(se->replacement.len))==0){
- LM_ERR("out of memory\n");
- goto error;
- }
- /* start copying */
- memcpy(se->replacement.s, repl, se->replacement.len);
- } else {
- se->replacement.s = NULL;
- }
- se->replace_all=replace_all;
- se->n_escapes=rw_no;
- se->max_pmatch=max_pmatch;
- for (r=0; r<rw_no; r++) se->replace[r]=rw[r];
- DBG("subst_parser: ok, se is %p\n", se);
- return se;
-
- error:
- if (se) { subst_expr_free(se); regex=0; }
- if (regex) { regfree (regex); pkg_free(regex); }
- return 0;
- }
- /* rpl.s will be alloc'ed with the proper size & rpl.len set
- * returns 0 on success, <0 on error*/
- static int replace_build(const char* match, int nmatch, regmatch_t* pmatch,
- struct subst_expr* se, struct sip_msg* msg, str* rpl)
- {
- int r;
- str* uri;
- pv_value_t sv;
- char* p;
- char* dest;
- char* end;
- int size;
- static char rbuf[REPLACE_BUFFER_SIZE];
- #define RBUF_APPEND(dst, src, size) \
- if ((dst) - rbuf + (size) >= REPLACE_BUFFER_SIZE - 1) { \
- LM_ERR("Buffer too small\n"); \
- goto error; \
- } \
- memcpy((dst), (src), (size)); \
- (dst) += (size);
- p=se->replacement.s;
- end=p+se->replacement.len;
- dest=rbuf;
-
- for (r=0; r<se->n_escapes; r++){
- /* copy the unescaped parts */
- size=se->replacement.s+se->replace[r].offset-p;
- RBUF_APPEND(dest, p, size);
- p+=size+se->replace[r].size;
- switch(se->replace[r].type){
- case REPLACE_NMATCH:
- if ((se->replace[r].u.nmatch<nmatch)&&(
- pmatch[se->replace[r].u.nmatch].rm_so!=-1)){
- /* do the replace */
- size=pmatch[se->replace[r].u.nmatch].rm_eo-
- pmatch[se->replace[r].u.nmatch].rm_so;
- RBUF_APPEND(dest,
- match+pmatch[se->replace[r].u.nmatch].rm_so,
- size);
- };
- break;
- case REPLACE_CHAR:
- RBUF_APPEND(dest, &se->replace[r].u.c, 1);
- break;
- case REPLACE_URI:
- if (msg->first_line.type!=SIP_REQUEST){
- LM_CRIT("uri substitution on a reply\n");
- break; /* ignore, we can continue */
- }
- uri= (msg->new_uri.s)?(&msg->new_uri):
- (&msg->first_line.u.request.uri);
- RBUF_APPEND(dest, uri->s, uri->len);
- break;
- case REPLACE_SPEC:
- if(pv_get_spec_value(msg, &se->replace[r].u.spec, &sv)!=0) {
- LM_ERR("item substitution returned error\n");
- break; /* ignore, we can continue */
- }
- RBUF_APPEND(dest, sv.rs.s, sv.rs.len);
- break;
- default:
- LM_CRIT("unknown type %d\n", se->replace[r].type);
- /* ignore it */
- }
- }
- RBUF_APPEND(dest, p, end-p);
- rpl->len = dest - rbuf;
- if ((rpl->s = pkg_malloc(rpl->len)) == NULL) {
- LM_ERR("Out of pkg memory\n");
- goto error;
- }
- memcpy(rpl->s, rbuf, rpl->len);
- return 0;
- error:
- return -1;
- }
- /* WARNING: input must be 0 terminated! */
- /* returns: 0 if no match or error, or subst result; if count!=0
- * it will be set to 0 (no match), the number of matches
- * or -1 (error).
- */
- struct replace_lst* subst_run(struct subst_expr* se, const char* input,
- struct sip_msg* msg, int* count)
- {
- struct replace_lst *head;
- struct replace_lst **crt;
- const char *p;
- int r;
- regmatch_t* pmatch;
- int nmatch;
- int eflags;
- int cnt;
-
-
- /* init */
- head=0;
- cnt=0;
- crt=&head;
- p=input;
- nmatch=se->max_pmatch+1;
- /* no of () referenced + 1 for the whole string: pmatch[0] */
- pmatch=pkg_malloc(nmatch*sizeof(regmatch_t));
- if (pmatch==0){
- LM_ERR("out of mem\n");
- goto error;
- }
- eflags=0;
- do{
- r=regexec(se->re, p, nmatch, pmatch, eflags);
- DBG("subst_run: running. r=%d\n", r);
- /* subst */
- if (r==0){ /* != REG_NOMATCH */
- if (pmatch[0].rm_so==-1) {
- LM_ERR("Unknown offset?\n");
- goto error;
- }
- if (pmatch[0].rm_so==pmatch[0].rm_eo) {
- LM_ERR("Matched string is empty, invalid regexp?\n");
- goto error;
- }
- *crt=pkg_malloc(sizeof(struct replace_lst));
- if (*crt==0){
- LM_ERR("out of mem\n");
- goto error;
- }
- memset(*crt, 0, sizeof(struct replace_lst));
- (*crt)->offset=pmatch[0].rm_so+(int)(p-input);
- (*crt)->size=pmatch[0].rm_eo-pmatch[0].rm_so;
- DBG("subst_run: matched (%d, %d): [%.*s]\n",
- (*crt)->offset, (*crt)->size,
- (*crt)->size, input+(*crt)->offset);
- /* create subst. string */
- /* construct the string from replace[] */
- if (replace_build(p, nmatch, pmatch, se, msg, &((*crt)->rpl))<0){
- goto error;
- }
- crt=&((*crt)->next);
- p+=pmatch[0].rm_eo;
- if (*(p-1) == '\n' || *(p-1) == '\r') eflags&=~REG_NOTBOL;
- else eflags|=REG_NOTBOL;
- cnt++;
- }
- }while((r==0) && se->replace_all);
- pkg_free(pmatch);
- if (count)*count=cnt;
- return head;
- error:
- if (head) replace_lst_free(head);
- if (pmatch) pkg_free(pmatch);
- if (count) *count=-1;
- return 0;
- }
- /* returns the substitution result in a str, input must be 0 term
- * 0 on no match or malloc error
- * if count is non zero it will be set to the number of matches, or -1
- * if error
- */
- str* subst_str(const char *input, struct sip_msg* msg, struct subst_expr* se,
- int* count)
- {
- str* res;
- struct replace_lst *lst;
- struct replace_lst* l;
- int len;
- int size;
- const char* p;
- char* dest;
- const char* end;
-
-
- /* compute the len */
- len=strlen(input);
- end=input+len;
- lst=subst_run(se, input, msg, count);
- if (lst==0){
- DBG("subst_str: no match\n");
- return 0;
- }
- for (l=lst; l; l=l->next)
- len+=(int)(l->rpl.len)-l->size;
- res=pkg_malloc(sizeof(str));
- if (res==0){
- LM_ERR("mem. allocation error\n");
- goto error;
- }
- res->s=pkg_malloc(len+1); /* space for null termination */
- if (res->s==0){
- LM_ERR("mem. allocation error (res->s)\n");
- goto error;
- }
- res->s[len]=0;
- res->len=len;
-
- /* replace */
- dest=res->s;
- p=input;
- for(l=lst; l; l=l->next){
- size=l->offset+input-p;
- memcpy(dest, p, size); /* copy till offset */
- p+=size + l->size; /* skip l->size bytes */
- dest+=size;
- if (l->rpl.len){
- memcpy(dest, l->rpl.s, l->rpl.len);
- dest+=l->rpl.len;
- }
- }
- memcpy(dest, p, end-p);
- if(lst) replace_lst_free(lst);
- return res;
- error:
- if (lst) replace_lst_free(lst);
- if (res){
- if (res->s) pkg_free(res->s);
- pkg_free(res);
- }
- if (count) *count=-1;
- return 0;
- }
|