Преглед изворни кода

module regex: pcre_match_group() allows pseudo-variable in group parameter

- example: pcre_match_group("$rU", "$var(num)"),
  where $var(num) can be a pseudo-variable containing an integer
  pointing to any group in the regex text file.
- module documentation updated and improved.
Iñaki Baz Castillo пре 14 година
родитељ
комит
7ea366cd3d
3 измењених фајлова са 232 додато и 158 уклоњено
  1. 196 142
      modules_k/regex/README
  2. 23 9
      modules_k/regex/doc/regex_admin.xml
  3. 13 7
      modules_k/regex/regex_mod.c

+ 196 - 142
modules_k/regex/README

@@ -1,53 +1,52 @@
 Regex Module
 
-Iñaki Baz Castillo
+Iñaki Baz Castillo
 
    <[email protected]>
 
 Edited by
 
-Iñaki Baz Castillo
+Iñaki Baz Castillo
 
    <[email protected]>
 
-   Copyright © 2009 Iñaki Baz Castillo
+   Copyright © 2009 Iñaki Baz Castillo
    Revision History
-   Revision $Revision: 5462 $ $Date: 2009-01-14 17:05:51 +0100
-                              (Mi, 14 Jan 2009) $
-     __________________________________________________________
+   Revision $Revision$ $Date$
+     __________________________________________________________________
 
    Table of Contents
 
    1. Admin Guide
 
-        1.1. Overview
-        1.2. Dependencies
+        1. Overview
+        2. Dependencies
 
-              1.2.1. Kamailio Modules
-              1.2.2. External Libraries or Applications
+              2.1. Kamailio Modules
+              2.2. External Libraries or Applications
 
-        1.3. Exported Parameters
+        3. Exported Parameters
 
-              1.3.1. file (string)
-              1.3.2. max_groups (int)
-              1.3.3. group_max_size (int)
-              1.3.4. pcre_caseless (int)
-              1.3.5. pcre_multiline (int)
-              1.3.6. pcre_dotall (int)
-              1.3.7. pcre_extended (int)
+              3.1. file (string)
+              3.2. max_groups (int)
+              3.3. group_max_size (int)
+              3.4. pcre_caseless (int)
+              3.5. pcre_multiline (int)
+              3.6. pcre_dotall (int)
+              3.7. pcre_extended (int)
 
-        1.4. Exported Functions
+        4. Exported Functions
 
-              1.4.1. pcre_match (string, pcre_regex)
-              1.4.2. pcre_match_group (string [, group])
+              4.1. pcre_match (string, pcre_regex)
+              4.2. pcre_match_group (string [, group])
 
-        1.5. Exported MI Functions
+        5. Exported MI Functions
 
-              1.5.1. regex_reload
+              5.1. regex_reload
 
-        1.6. Installation and Running
+        6. Installation and Running
 
-              1.6.1. File format
+              6.1. File format
 
    List of Examples
 
@@ -61,165 +60,205 @@ I
    1.8. pcre_match usage (forcing case insensitive)
    1.9. pcre_match usage (using "end of line" symbol)
    1.10. pcre_match_group usage
-   1.11. regex file
-   1.12. Using with pua_usrloc
-   1.13. Incorrect groups file
+   1.11. pcre_match_group usage (using a pseudo-variable as group)
+   1.12. regex file
+   1.13. Using with pua_usrloc
+   1.14. Incorrect groups file
 
 Chapter 1. Admin Guide
 
-1.1. Overview
+   Table of Contents
+
+   1. Overview
+   2. Dependencies
+
+        2.1. Kamailio Modules
+        2.2. External Libraries or Applications
+
+   3. Exported Parameters
+
+        3.1. file (string)
+        3.2. max_groups (int)
+        3.3. group_max_size (int)
+        3.4. pcre_caseless (int)
+        3.5. pcre_multiline (int)
+        3.6. pcre_dotall (int)
+        3.7. pcre_extended (int)
+
+   4. Exported Functions
+
+        4.1. pcre_match (string, pcre_regex)
+        4.2. pcre_match_group (string [, group])
+
+   5. Exported MI Functions
+
+        5.1. regex_reload
+
+   6. Installation and Running
 
-   This module offers matching operations against regular
-   expressions using the powerful PCRE library.
+        6.1. File format
 
-   A text file containing regular expressions categorized in
-   groups is compiled when the module is loaded, storing the
-   compiled PCRE objects in an array. A function to match a string
-   or pseudo-variable against any of these groups is provided. The
-   text file can be modified and reloaded at any time via a MI
-   command. The module also offers a function to perform a PCRE
-   matching operation against a regular expression provided as
-   function parameter.
+1. Overview
 
-   For a detailed list of PCRE features read the man page of the
-   library.
+   This module offers matching operations against regular expressions
+   using the powerful PCRE library.
 
-1.2. Dependencies
+   A text file containing regular expressions categorized in groups is
+   compiled when the module is loaded, storing the compiled PCRE objects
+   in an array. A function to match a string or pseudo-variable against
+   any of these groups is provided. The text file can be modified and
+   reloaded at any time via a MI command. The module also offers a
+   function to perform a PCRE matching operation against a regular
+   expression provided as function parameter.
 
-1.2.1. Kamailio Modules
+   For a detailed list of PCRE features read the man page of the library.
+
+2. Dependencies
+
+   2.1. Kamailio Modules
+   2.2. External Libraries or Applications
+
+2.1. Kamailio Modules
 
    The following modules must be loaded before this module:
      * No dependencies on other Kamailio modules.
 
-1.2.2. External Libraries or Applications
+2.2. External Libraries or Applications
 
-   The following libraries or applications must be installed
-   before running Kamailio with this module loaded:
+   The following libraries or applications must be installed before
+   running Kamailio with this module loaded:
      * libpcre - the libraries of PCRE.
 
-1.3. Exported Parameters
+3. Exported Parameters
 
-1.3.1. file (string)
+   3.1. file (string)
+   3.2. max_groups (int)
+   3.3. group_max_size (int)
+   3.4. pcre_caseless (int)
+   3.5. pcre_multiline (int)
+   3.6. pcre_dotall (int)
+   3.7. pcre_extended (int)
 
-   Text file containing the regular expression groups. It must be
-   set in order to enable the group matching function.
+3.1. file (string)
 
-   Default value is "NULL".
+   Text file containing the regular expression groups. It must be set in
+   order to enable the group matching function.
+
+   Default value is “NULL�.
 
    Example 1.1. Set file parameter
 ...
 modparam("regex", "file", "/etc/kamailio/regex_groups")
 ...
 
-1.3.2. max_groups (int)
+3.2. max_groups (int)
 
    Max number of regular expression groups in the text file.
 
-   Default value is "20".
+   Default value is “20�.
 
    Example 1.2. Set max_groups parameter
 ...
 modparam("regex", "max_groups", 40)
 ...
 
-1.3.3. group_max_size (int)
+3.3. group_max_size (int)
 
    Max content size of a group in the text file.
 
-   Default value is "8192".
+   Default value is “8192�.
 
    Example 1.3. Set group_max_size parameter
 ...
 modparam("regex", "group_max_size", 16384)
 ...
 
-1.3.4. pcre_caseless (int)
+3.4. pcre_caseless (int)
 
-   If this options is set, matching is done caseless. It is
-   equivalent to Perl's /i option, and it can be changed within a
-   pattern by a (?i) or (?-i) option setting.
+   If this options is set, matching is done caseless. It is equivalent to
+   Perl's /i option, and it can be changed within a pattern by a (?i) or
+   (?-i) option setting.
 
-   Default value is "0".
+   Default value is “0�.
 
    Example 1.4. Set pcre_caseless parameter
 ...
 modparam("regex", "pcre_caseless", 1)
 ...
 
-1.3.5. pcre_multiline (int)
+3.5. pcre_multiline (int)
 
-   By default, PCRE treats the subject string as consisting of a
-   single line of characters (even if it actually contains
-   newlines). The "start of line" metacharacter (^) matches only
-   at the start of the string, while the "end of line"
-   metacharacter ($) matches only at the end of the string, or
-   before a terminating newline.
+   By default, PCRE treats the subject string as consisting of a single
+   line of characters (even if it actually contains newlines). The "start
+   of line" metacharacter (^) matches only at the start of the string,
+   while the "end of line" metacharacter ($) matches only at the end of
+   the string, or before a terminating newline.
 
    When this option is set, the "start of line" and "end of line"
-   constructs match immediately following or immediately before
-   internal newlines in the subject string, respectively, as well
-   as at the very start and end. This is equivalent to Perl's /m
-   option, and it can be changed within a pattern by a (?m) or
-   (?-m) option setting. If there are no newlines in a subject
-   string, or no occurrences of ^ or $ in a pattern, setting this
-   option has no effect.
+   constructs match immediately following or immediately before internal
+   newlines in the subject string, respectively, as well as at the very
+   start and end. This is equivalent to Perl's /m option, and it can be
+   changed within a pattern by a (?m) or (?-m) option setting. If there
+   are no newlines in a subject string, or no occurrences of ^ or $ in a
+   pattern, setting this option has no effect.
 
-   Default value is "0".
+   Default value is “0�.
 
    Example 1.5. Set pcre_multiline parameter
 ...
 modparam("regex", "pcre_multiline", 1)
 ...
 
-1.3.6. pcre_dotall (int)
+3.6. pcre_dotall (int)
 
-   If this option is set, a dot metacharater in the pattern
-   matches all characters, including those that indicate newline.
-   Without it, a dot does not match when the current position is
-   at a newline. This option is equivalent to Perl's /s option,
-   and it can be changed within a pattern by a (?s) or (?-s)
-   option setting.
+   If this option is set, a dot metacharater in the pattern matches all
+   characters, including those that indicate newline. Without it, a dot
+   does not match when the current position is at a newline. This option
+   is equivalent to Perl's /s option, and it can be changed within a
+   pattern by a (?s) or (?-s) option setting.
 
-   Default value is "0".
+   Default value is “0�.
 
    Example 1.6. Set pcre_dotall parameter
 ...
 modparam("regex", "pcre_dotall", 1)
 ...
 
-1.3.7. pcre_extended (int)
+3.7. pcre_extended (int)
 
-   If this option is set, whitespace data characters in the
-   pattern are totally ignored except when escaped or inside a
-   character class. Whitespace does not include the VT character
-   (code 11). In addition, characters between an unescaped #
-   outside a character class and the next newline, inclusive, are
-   also ignored. This is equivalent to Perl's /x option, and it
-   can be changed within a pattern by a (?x) or (?-x) option
-   setting.
+   If this option is set, whitespace data characters in the pattern are
+   totally ignored except when escaped or inside a character class.
+   Whitespace does not include the VT character (code 11). In addition,
+   characters between an unescaped # outside a character class and the
+   next newline, inclusive, are also ignored. This is equivalent to Perl's
+   /x option, and it can be changed within a pattern by a (?x) or (?-x)
+   option setting.
 
-   Default value is "0".
+   Default value is “0�.
 
    Example 1.7. Set pcre_extended parameter
 ...
 modparam("regex", "pcre_extended", 1)
 ...
 
-1.4. Exported Functions
+4. Exported Functions
+
+   4.1. pcre_match (string, pcre_regex)
+   4.2. pcre_match_group (string [, group])
 
-1.4.1.  pcre_match (string, pcre_regex)
+4.1.  pcre_match (string, pcre_regex)
 
-   Matches the given string parameter against the regular
-   expression pcre_regex, which is compiled into a PCRE object.
-   Returns TRUE if it matches, FALSE otherwise.
+   Matches the given string parameter against the regular expression
+   pcre_regex, which is compiled in runtime into a PCRE object. Returns
+   TRUE if it matches, FALSE otherwise.
 
    Meaning of the parameters is as follows:
      * string - String or pseudo-variable to compare.
-     * pcre_regex - Regular expression to be compiled in a PCRE
-       object. It can be a string or pseudo-variable.
+     * pcre_regex - Regular expression to be compiled in a PCRE object. It
+       can be a string or pseudo-variable.
 
-   NOTE: To use the "end of line" symbol '$' in the pcre_regex
-   parameter use '$$'.
+   NOTE: To use the "end of line" symbol '$' in the pcre_regex parameter
+   use '$$'.
 
    This function can be used from REQUEST_ROUTE, FAILURE_ROUTE,
    ONREPLY_ROUTE, BRANCH_ROUTE and LOCAL_ROUTE.
@@ -233,41 +272,54 @@ if (pcre_match("$ua", "(?i)^twinkle")) {
 
    Example 1.9.  pcre_match usage (using "end of line" symbol)
 ...
-if (pcre_match("$rU", "^user[1234]$$")) {  # Will be converted to "^user
-[1234]$"
+if (pcre_match("$rU", "^user[1234]$$")) {  # Will be converted to "^user[1234]$"
     xlog("L_INFO", "RURI username matches\n");
 }
 ...
 
-1.4.2.  pcre_match_group (string [, group])
+4.2.  pcre_match_group (string [, group])
 
-   It uses the groups readed from the text file (see
-   Section 1.6.1, "File format") to match the given string
-   parameter against the compiled regular expression in group
-   number group. Returns TRUE if it matches, FALSE otherwise.
+   Tries to match the given string against a specific group in the text
+   file (see Section 6.1, “File format�). Returns TRUE if it matches,
+   FALSE otherwise.
 
    Meaning of the parameters is as follows:
      * string - String or pseudo-variable to compare.
-     * group - Number of group to use in the operation. If not
-       specified then 0 (the first group) is used.
+     * group - Number of group to use in the operation. If not specified
+       then 0 (the first group) is used. A pseudo-variable containing an
+       integer can also be used.
 
    This function can be used from REQUEST_ROUTE, FAILURE_ROUTE,
    ONREPLY_ROUTE, BRANCH_ROUTE and LOCAL_ROUTE.
 
    Example 1.10.  pcre_match_group usage
 ...
-if (pcre_match_group("$rU", 2)) {
+if (pcre_match_group("$rU", "2")) {
     xlog("L_INFO", "RURI username matches group 2\n");
 }
 ...
 
-1.5. Exported MI Functions
+   Example 1.11.  pcre_match_group usage (using a pseudo-variable as
+   group)
+                                        ...
+                                        $avp(i:10) = 5;  # Maybe got from a DB q
+uery.
+                                        if (pcre_match_group("$ua", "$avp(i:10)"
+)) {
+                                        xlog("L_INFO", "User-Agent matches group
+ 5\n");
+                                        }
+                                        ...
+
+5. Exported MI Functions
 
-1.5.1.  regex_reload
+   5.1. regex_reload
+
+5.1.  regex_reload
 
    Causes regex module to re-read the content of the text file and
-   re-compile the regular expressions. The number of groups in the
-   file can be modified safely.
+   re-compile the regular expressions. The number of groups in the file
+   can be modified safely.
 
    Name: regex_reload
 
@@ -277,19 +329,21 @@ if (pcre_match_group("$rU", 2)) {
 :regex_reload:_reply_fifo_file_
 _empty_line_
 
-1.6. Installation and Running
+6. Installation and Running
+
+   6.1. File format
 
-1.6.1. File format
+6.1. File format
 
-   The file contains regular expressions categorized in groups.
-   Each group starts with "[number]" line. Lines starting by
-   space, tab, CR, LF or # (comments) are ignored. Each regular
-   expression must take up just one line, this means that a
-   regular expression can't be splitted in various lines.
+   The file contains regular expressions categorized in groups. Each group
+   starts with "[number]" line. Lines starting by space, tab, CR, LF or #
+   (comments) are ignored. Each regular expression must take up just one
+   line, this means that a regular expression can't be splitted in various
+   lines.
 
    An example of the file format would be the following:
 
-   Example 1.11. regex file
+   Example 1.12. regex file
 ### List of User-Agents publishing presence status
 [0]
 
@@ -330,12 +384,12 @@ group 0: ((^Twinkle/1)|(^X-Lite)|(^eyeBeam)|(^Bria)|(^SIP Communicator)|
 group 1: ((^190\.232\.250\.226$)|(^122\.5\.27\.125$)|(^86\.92\.112\.))
 group 2: ((^1\d{3}$)|(^((\+|00)34)?900\d{6}$))
 
-   The first group can be used to avoid auto-generated PUBLISH
-   (pua_usrloc module) for UA's already supporting presence:
+   The first group can be used to avoid auto-generated PUBLISH (pua_usrloc
+   module) for UA's already supporting presence:
 
-   Example 1.12. Using with pua_usrloc
+   Example 1.13. Using with pua_usrloc
 route[REGISTER] {
-    if (! pcre_match_group("$ua", 0)) {
+    if (! pcre_match_group("$ua", "0")) {
         xlog("L_INFO", "Auto-generated PUBLISH for $fu ($ua)\n");
         pua_set_publish();
     }
@@ -343,12 +397,12 @@ route[REGISTER] {
     exit;
 }
 
-   NOTE: It's important to understand that the numbers in each
-   group header ([number]) must start by 0. If not, the real group
-   number will not match the number appearing in the file. For
-   example, the following text file:
+   NOTE: It's important to understand that the numbers in each group
+   header ([number]) must start by 0. If not, the real group number will
+   not match the number appearing in the file. For example, the following
+   text file:
 
-   Example 1.13. Incorrect groups file
+   Example 1.14. Incorrect groups file
 [1]
 ^aaa
 ^bbb
@@ -361,16 +415,16 @@ route[REGISTER] {
 group 0: ((^aaa)|(^bbb))
 group 1: ((^ccc)|(^ddd))
 
-   Note that the real index doesn't match the group number in the
-   file. This is, compiled group 0 always points to the first
-   group in the file, regardless of its number in the file. In
-   fact, the group number appearing in the file is used for
-   nothing but for delimiting different groups.
+   Note that the real index doesn't match the group number in the file.
+   This is, compiled group 0 always points to the first group in the file,
+   regardless of its number in the file. In fact, the group number
+   appearing in the file is used for nothing but for delimiting different
+   groups.
 
-   NOTE: A line containing a regular expression cannot start by
-   '[' since it would be treated as a new group. The same for
-   lines starting by space, tab, or '#' (they would be ignored by
-   the parser). As a workaround, using brackets would work:
+   NOTE: A line containing a regular expression cannot start by '[' since
+   it would be treated as a new group. The same for lines starting by
+   space, tab, or '#' (they would be ignored by the parser). As a
+   workaround, using brackets would work:
 [0]
 ([0-9]{9})
 ( #abcde)

+ 23 - 9
modules_k/regex/doc/regex_admin.xml

@@ -237,8 +237,8 @@ modparam("regex", "pcre_extended", 1)
 
 			<para>
 				Matches the given string parameter against the regular expression pcre_regex,
-				which is compiled into a PCRE object. Returns TRUE if it matches, FALSE
-				otherwise.
+				which is compiled in runtime into a PCRE object. Returns TRUE if it matches,
+				FALSE otherwise.
 			</para>
 
 			<para>Meaning of the parameters is as follows:</para>
@@ -300,10 +300,9 @@ if (pcre_match("$rU", "^user[1234]$$")) {  # Will be converted to "^user[1234]$"
 			</title>
 
 			<para>
-				It uses the groups readed from the text file
-				(see <xref linkend="file-format-id"/>) to match the given string
-				parameter against the compiled regular expression in group number group.
-				Returns TRUE if it matches, FALSE otherwise.
+				Tries to match the given string against a specific group in the text
+				file (see <xref linkend="file-format-id"/>). Returns TRUE if it matches,
+				FALSE otherwise.
 			</para>
 
 			<para>Meaning of the parameters is as follows:</para>
@@ -317,7 +316,8 @@ if (pcre_match("$rU", "^user[1234]$$")) {  # Will be converted to "^user[1234]$"
 				<listitem>
 					<para>
 						<emphasis>group</emphasis> - Number of group to use in the operation.
-						If not specified then 0 (the first group) is used.
+						If not specified then 0 (the first group) is used. A pseudo-variable
+						containing an integer can also be used.
 					</para>
 				</listitem>
 			</itemizedlist>
@@ -333,13 +333,27 @@ if (pcre_match("$rU", "^user[1234]$$")) {  # Will be converted to "^user[1234]$"
 				</title>
 <programlisting format="linespecific">
 ...
-if (pcre_match_group("$rU", 2)) {
+if (pcre_match_group("$rU", "2")) {
     xlog("L_INFO", "RURI username matches group 2\n");
 }
 ...
 </programlisting>
 			</example>
 
+			<example>
+				<title>
+					<function>pcre_match_group</function> usage (using a pseudo-variable as group)
+				</title>
+				<programlisting format="linespecific">
+					...
+					$avp(i:10) = 5;  # Maybe got from a DB query.
+					if (pcre_match_group("$ua", "$avp(i:10)")) {
+					xlog("L_INFO", "User-Agent matches group 5\n");
+					}
+					...
+				</programlisting>
+			</example>
+ 
 		</section>
 
 	</section>
@@ -455,7 +469,7 @@ group 2: ((^1\d{3}$)|(^((\+|00)34)?900\d{6}$))
 				<title>Using with pua_usrloc</title>
 <programlisting  format="linespecific">
 route[REGISTER] {
-    if (! pcre_match_group("$ua", 0)) {
+    if (! pcre_match_group("$ua", "0")) {
         xlog("L_INFO", "Auto-generated PUBLISH for $fu ($ua)\n");
         pua_set_publish();
     }

+ 13 - 7
modules_k/regex/regex_mod.c

@@ -23,7 +23,8 @@
  *
  * History:
  * --------
- *  2009-01-14  initial version (Iñaki Baz Castillo)
+ *  2011-02-22  pcre_match_group() allows now pseudo-variable as group argument.
+ *  2009-01-14  initial version (Iñaki Baz Castillo).
  */
 
 
@@ -119,7 +120,7 @@ static cmd_export_t cmds[] =
 {
 	{ "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
 		REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
-	{ "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_uint, 0,
+	{ "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_spve, 0,
 		REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
 	{ "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
 		REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
@@ -383,7 +384,7 @@ static int load_pcres(int action)
 	}
 	
 	/* Log the group patterns */
-	LM_NOTICE("num groups = %d\n\n", num_pcres_tmp);
+	LM_NOTICE("num groups = %d\n", num_pcres_tmp);
 	for (i=0; i < num_pcres_tmp; i++) {
 		LM_NOTICE("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
 	}
@@ -587,8 +588,8 @@ static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
 /*! \brief Return true if the string argument matches the pattern group parameter */
 static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
 {
-	str string;
-	int num_pcre;
+	str string, group;
+	unsigned int num_pcre;
 	int pcre_rc;
 	
 	/* Check if group matching feature is enabled */
@@ -605,7 +606,12 @@ static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
 	if (_s2 == NULL) {
 		num_pcre = 0;
 	} else {
-		num_pcre = (uint)(long)_s2;
+		if (fixup_get_svalue(_msg, (gparam_p)_s2, &group))
+		{
+			LM_ERR("cannot print the format for second param\n");
+			return -5;
+		}
+		str2int(&group, &num_pcre);
 	}
 	
 	if (num_pcre >= *num_pcres) {
@@ -615,7 +621,7 @@ static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
 	
 	if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
 	{
-		LM_ERR("cannot print the format\n");
+		LM_ERR("cannot print the format for first param\n");
 		return -5;
 	}