2
0

regex_mod.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /*
  2. * $Id$
  3. *
  4. * regex module - pcre operations
  5. *
  6. * Copyright (C) 2008 Iñaki Baz Castillo
  7. *
  8. * This file is part of Kamailio, a free SIP server.
  9. *
  10. * Kamailio is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version
  14. *
  15. * Kamailio is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23. *
  24. * History:
  25. * --------
  26. * 2011-02-22 pcre_match_group() allows now pseudo-variable as group argument.
  27. * 2009-01-14 initial version (Iñaki Baz Castillo).
  28. */
  29. /*!
  30. * \file
  31. * \brief REGEX :: Perl-compatible regular expressions using PCRE library
  32. * Copyright (C) 2008 Iñaki Baz Castillo
  33. * \ingroup regex
  34. */
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <string.h>
  38. #include <sys/stat.h>
  39. #include <pcre.h>
  40. #include "../../sr_module.h"
  41. #include "../../dprint.h"
  42. #include "../../pt.h"
  43. #include "../../mem/shm_mem.h"
  44. #include "../../str.h"
  45. #include "../../locking.h"
  46. #include "../../mod_fix.h"
  47. #include "../../lib/kmi/mi.h"
  48. MODULE_VERSION
  49. #define START 0
  50. #define RELOAD 1
  51. #define FILE_MAX_LINE 500 /*!< Max line size in the file */
  52. #define MAX_GROUPS 20 /*!< Max number of groups */
  53. #define GROUP_MAX_SIZE 8192 /*!< Max size of a group */
  54. /*
  55. * Locking variables
  56. */
  57. gen_lock_t *reload_lock;
  58. /*
  59. * Module exported parameter variables
  60. */
  61. static char *file;
  62. static int max_groups = MAX_GROUPS;
  63. static int group_max_size = GROUP_MAX_SIZE;
  64. static int pcre_caseless = 0;
  65. static int pcre_multiline = 0;
  66. static int pcre_dotall = 0;
  67. static int pcre_extended = 0;
  68. /*
  69. * Module internal parameter variables
  70. */
  71. static pcre **pcres;
  72. static pcre ***pcres_addr;
  73. static int *num_pcres;
  74. static int pcre_options = 0x00000000;
  75. /*
  76. * Module core functions
  77. */
  78. static int mod_init(void);
  79. static void destroy(void);
  80. /*
  81. * Module internal functions
  82. */
  83. static int load_pcres(int);
  84. static void free_shared_memory(void);
  85. /*
  86. * Script functions
  87. */
  88. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
  89. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
  90. /*
  91. * MI functions
  92. */
  93. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
  94. /*
  95. * Exported functions
  96. */
  97. static cmd_export_t cmds[] =
  98. {
  99. { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
  100. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  101. { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_spve, 0,
  102. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  103. { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
  104. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  105. { 0, 0, 0, 0, 0, 0 }
  106. };
  107. /*
  108. * Exported parameters
  109. */
  110. static param_export_t params[] = {
  111. {"file", STR_PARAM, &file },
  112. {"max_groups", INT_PARAM, &max_groups },
  113. {"group_max_size", INT_PARAM, &group_max_size },
  114. {"pcre_caseless", INT_PARAM, &pcre_caseless },
  115. {"pcre_multiline", INT_PARAM, &pcre_multiline },
  116. {"pcre_dotall", INT_PARAM, &pcre_dotall },
  117. {"pcre_extended", INT_PARAM, &pcre_extended },
  118. {0, 0, 0}
  119. };
  120. /*
  121. * Exported MI functions
  122. */
  123. static mi_export_t mi_cmds[] = {
  124. { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
  125. { 0, 0, 0, 0 ,0 }
  126. };
  127. /*
  128. * Module interface
  129. */
  130. struct module_exports exports = {
  131. "regex", /*!< module name */
  132. DEFAULT_DLFLAGS, /*!< dlopen flags */
  133. cmds, /*!< exported functions */
  134. params, /*!< exported parameters */
  135. 0, /*!< exported statistics */
  136. mi_cmds, /*!< exported MI functions */
  137. 0, /*!< exported pseudo-variables */
  138. 0, /*!< extra processes */
  139. mod_init, /*!< module initialization function */
  140. (response_function) 0, /*!< response handling function */
  141. destroy, /*!< destroy function */
  142. 0 /*!< per-child init function */
  143. };
  144. /*! \brief
  145. * Init module function
  146. */
  147. static int mod_init(void)
  148. {
  149. LM_INFO("initializing module...\n");
  150. if(register_mi_mod(exports.name, mi_cmds)!=0)
  151. {
  152. LM_ERR("failed to register MI commands\n");
  153. return -1;
  154. }
  155. /* Group matching feature */
  156. if (file == NULL) {
  157. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  158. } else {
  159. /* Create and init the lock */
  160. reload_lock = lock_alloc();
  161. if (reload_lock == NULL) {
  162. LM_ERR("cannot allocate reload_lock\n");
  163. goto err;
  164. }
  165. if (lock_init(reload_lock) == NULL) {
  166. LM_ERR("cannot init the reload_lock\n");
  167. lock_dealloc(reload_lock);
  168. goto err;
  169. }
  170. /* PCRE options */
  171. if (pcre_caseless != 0) {
  172. LM_DBG("PCRE CASELESS enabled\n");
  173. pcre_options = pcre_options | PCRE_CASELESS;
  174. }
  175. if (pcre_multiline != 0) {
  176. LM_DBG("PCRE MULTILINE enabled\n");
  177. pcre_options = pcre_options | PCRE_MULTILINE;
  178. }
  179. if (pcre_dotall != 0) {
  180. LM_DBG("PCRE DOTALL enabled\n");
  181. pcre_options = pcre_options | PCRE_DOTALL;
  182. }
  183. if (pcre_extended != 0) {
  184. LM_DBG("PCRE EXTENDED enabled\n");
  185. pcre_options = pcre_options | PCRE_EXTENDED;
  186. }
  187. LM_DBG("PCRE options: %i\n", pcre_options);
  188. /* Pointer to pcres */
  189. if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
  190. LM_ERR("no memory for pcres_addr\n");
  191. goto err;
  192. }
  193. /* Integer containing the number of pcres */
  194. if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
  195. LM_ERR("no memory for num_pcres\n");
  196. goto err;
  197. }
  198. /* Load the pcres */
  199. LM_NOTICE("loading pcres...\n");
  200. if (load_pcres(START)) {
  201. LM_CRIT("failed to load pcres\n");
  202. goto err;
  203. }
  204. }
  205. return 0;
  206. err:
  207. free_shared_memory();
  208. return -1;
  209. }
  210. static void destroy(void)
  211. {
  212. free_shared_memory();
  213. }
  214. /*! \brief Convert the file content into regular expresions and store them in pcres */
  215. static int load_pcres(int action)
  216. {
  217. int i, j;
  218. FILE *f;
  219. char line[FILE_MAX_LINE];
  220. char **patterns = NULL;
  221. pcre *pcre_tmp = NULL;
  222. int pcre_size;
  223. int pcre_rc;
  224. const char *pcre_error;
  225. int pcre_erroffset;
  226. int num_pcres_tmp = 0;
  227. pcre **pcres_tmp = NULL;
  228. /* Get the lock */
  229. lock_get(reload_lock);
  230. if (!(f = fopen(file, "r"))) {
  231. LM_ERR("could not open file '%s'\n", file);
  232. goto err;
  233. }
  234. /* Array containing each pattern in the file */
  235. if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
  236. LM_ERR("no more memory for patterns\n");
  237. fclose(f);
  238. goto err;
  239. }
  240. for (i=0; i<max_groups; i++) {
  241. patterns[i] = NULL;
  242. }
  243. for (i=0; i<max_groups; i++) {
  244. if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
  245. LM_ERR("no more memory for patterns[%d]\n", i);
  246. fclose(f);
  247. goto err;
  248. }
  249. memset(patterns[i], '\0', group_max_size);
  250. }
  251. /* Read the file and extract the patterns */
  252. memset(line, '\0', FILE_MAX_LINE);
  253. i = -1;
  254. while (fgets(line, FILE_MAX_LINE, f) != NULL) {
  255. /* Ignore comments and lines starting by space, tab, CR, LF */
  256. if(isspace(line[0]) || line[0]=='#') {
  257. memset(line, '\0', FILE_MAX_LINE);
  258. continue;
  259. }
  260. /* First group */
  261. if (i == -1 && line[0] != '[') {
  262. LM_ERR("first group must be initialized with [0] before any regular expression\n");
  263. fclose(f);
  264. goto err;
  265. }
  266. /* New group */
  267. if (line[0] == '[') {
  268. i++;
  269. /* Check if there are more patterns than the max value */
  270. if (i >= max_groups) {
  271. LM_ERR("max patterns exceded\n");
  272. fclose(f);
  273. goto err;
  274. }
  275. /* Start the regular expression with '(' */
  276. patterns[i][0] = '(';
  277. memset(line, '\0', FILE_MAX_LINE);
  278. continue;
  279. }
  280. /* Check if the patter size is too big (aprox) */
  281. if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
  282. LM_ERR("pattern max file exceded\n");
  283. fclose(f);
  284. goto err;
  285. }
  286. /* Append ')' at the end of the line */
  287. if (line[strlen(line) - 1] == '\n') {
  288. line[strlen(line)] = line[strlen(line) - 1];
  289. line[strlen(line) - 2] = ')';
  290. } else {
  291. /* This is the last char in the file and it's not \n */
  292. line[strlen(line)] = ')';
  293. }
  294. /* Append '(' at the beginning of the line */
  295. memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
  296. /* Append the line to the current pattern */
  297. memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
  298. memset(line, '\0', FILE_MAX_LINE);
  299. }
  300. num_pcres_tmp = i + 1;
  301. fclose(f);
  302. /* Fix the patterns */
  303. for (i=0; i < num_pcres_tmp; i++) {
  304. /* Convert empty groups in unmatcheable regular expression ^$ */
  305. if (strlen(patterns[i]) == 1) {
  306. patterns[i][0] = '^';
  307. patterns[i][1] = '$';
  308. patterns[i][2] = '\0';
  309. continue;
  310. }
  311. /* Delete possible '\n' at the end of the pattern */
  312. if (patterns[i][strlen(patterns[i])-1] == '\n') {
  313. patterns[i][strlen(patterns[i])-1] = '\0';
  314. }
  315. /* Replace '\n' with '|' (except at the end of the pattern) */
  316. for (j=0; j < strlen(patterns[i]); j++) {
  317. if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
  318. patterns[i][j] = '|';
  319. }
  320. }
  321. /* Add ')' at the end of the pattern */
  322. patterns[i][strlen(patterns[i])] = ')';
  323. }
  324. /* Log the group patterns */
  325. LM_NOTICE("num groups = %d\n", num_pcres_tmp);
  326. for (i=0; i < num_pcres_tmp; i++) {
  327. LM_NOTICE("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
  328. }
  329. /* Temporal pointer of pcres */
  330. if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  331. LM_ERR("no more memory for pcres_tmp\n");
  332. goto err;
  333. }
  334. for (i=0; i<num_pcres_tmp; i++) {
  335. pcres_tmp[i] = NULL;
  336. }
  337. /* Compile the patters */
  338. for (i=0; i<num_pcres_tmp; i++) {
  339. pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
  340. if (pcre_tmp == NULL) {
  341. LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
  342. goto err;
  343. }
  344. pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
  345. if (pcre_rc) {
  346. printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
  347. goto err;
  348. }
  349. if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
  350. LM_ERR("no more memory for pcres_tmp[%i]\n", i);
  351. goto err;
  352. }
  353. memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
  354. pcre_free(pcre_tmp);
  355. pkg_free(patterns[i]);
  356. }
  357. /* Copy to shared memory */
  358. if (action == RELOAD) {
  359. for(i=0; i<*num_pcres; i++) { /* Use the previous num_pcres value */
  360. if (pcres[i]) {
  361. shm_free(pcres[i]);
  362. }
  363. }
  364. shm_free(pcres);
  365. }
  366. if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  367. LM_ERR("no more memory for pcres\n");
  368. goto err;
  369. }
  370. for (i=0; i<num_pcres_tmp; i++) {
  371. pcres[i] = NULL;
  372. }
  373. for (i=0; i<num_pcres_tmp; i++) {
  374. pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
  375. if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
  376. LM_ERR("no more memory for pcres[%i]\n", i);
  377. goto err;
  378. }
  379. memcpy(pcres[i], pcres_tmp[i], pcre_size);
  380. }
  381. *num_pcres = num_pcres_tmp;
  382. *pcres_addr = pcres;
  383. /* Free used memory */
  384. for (i=0; i<num_pcres_tmp; i++) {
  385. pkg_free(pcres_tmp[i]);
  386. }
  387. pkg_free(pcres_tmp);
  388. pkg_free(patterns);
  389. lock_release(reload_lock);
  390. return 0;
  391. err:
  392. if (patterns) {
  393. for(i=0; i<max_groups; i++) {
  394. if (patterns[i]) {
  395. pkg_free(patterns[i]);
  396. }
  397. }
  398. pkg_free(patterns);
  399. }
  400. if (pcres_tmp) {
  401. for (i=0; i<num_pcres_tmp; i++) {
  402. if (pcres_tmp[i]) {
  403. pkg_free(pcres_tmp[i]);
  404. }
  405. }
  406. pkg_free(pcres_tmp);
  407. }
  408. if (reload_lock) {
  409. lock_release(reload_lock);
  410. }
  411. if (action == START) {
  412. free_shared_memory();
  413. }
  414. return -1;
  415. }
  416. static void free_shared_memory(void)
  417. {
  418. int i;
  419. if (pcres) {
  420. for(i=0; i<*num_pcres; i++) {
  421. if (pcres[i]) {
  422. shm_free(pcres[i]);
  423. }
  424. }
  425. shm_free(pcres);
  426. }
  427. if (num_pcres) {
  428. shm_free(num_pcres);
  429. }
  430. if (pcres_addr) {
  431. shm_free(pcres_addr);
  432. }
  433. if (reload_lock) {
  434. lock_destroy(reload_lock);
  435. lock_dealloc(reload_lock);
  436. }
  437. }
  438. /*
  439. * Script functions
  440. */
  441. /*! \brief Return true if the argument matches the regular expression parameter */
  442. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
  443. {
  444. str string;
  445. str regex;
  446. pcre *pcre_re = NULL;
  447. int pcre_rc;
  448. const char *pcre_error;
  449. int pcre_erroffset;
  450. if (_s1 == NULL) {
  451. LM_ERR("bad parameters\n");
  452. return -2;
  453. }
  454. if (_s2 == NULL) {
  455. LM_ERR("bad parameters\n");
  456. return -2;
  457. }
  458. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  459. {
  460. LM_ERR("cannot print the format for string\n");
  461. return -3;
  462. }
  463. if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
  464. {
  465. LM_ERR("cannot print the format for regex\n");
  466. return -3;
  467. }
  468. pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
  469. if (pcre_re == NULL) {
  470. LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
  471. return -4;
  472. }
  473. pcre_rc = pcre_exec(
  474. pcre_re, /* the compiled pattern */
  475. NULL, /* no extra data - we didn't study the pattern */
  476. string.s, /* the matching string */
  477. (int)(string.len), /* the length of the subject */
  478. 0, /* start at offset 0 in the string */
  479. 0, /* default options */
  480. NULL, /* output vector for substring information */
  481. 0); /* number of elements in the output vector */
  482. /* Matching failed: handle error cases */
  483. if (pcre_rc < 0) {
  484. switch(pcre_rc) {
  485. case PCRE_ERROR_NOMATCH:
  486. LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
  487. break;
  488. default:
  489. LM_DBG("matching error '%d'\n", pcre_rc);
  490. break;
  491. }
  492. pcre_free(pcre_re);
  493. return -1;
  494. }
  495. pcre_free(pcre_re);
  496. LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
  497. return 1;
  498. }
  499. /*! \brief Return true if the string argument matches the pattern group parameter */
  500. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
  501. {
  502. str string, group;
  503. unsigned int num_pcre;
  504. int pcre_rc;
  505. /* Check if group matching feature is enabled */
  506. if (file == NULL) {
  507. LM_ERR("group matching is disabled\n");
  508. return -2;
  509. }
  510. if (_s1 == NULL) {
  511. LM_ERR("bad parameters\n");
  512. return -3;
  513. }
  514. if (_s2 == NULL) {
  515. num_pcre = 0;
  516. } else {
  517. if (fixup_get_svalue(_msg, (gparam_p)_s2, &group))
  518. {
  519. LM_ERR("cannot print the format for second param\n");
  520. return -5;
  521. }
  522. str2int(&group, &num_pcre);
  523. }
  524. if (num_pcre >= *num_pcres) {
  525. LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
  526. return -4;
  527. }
  528. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  529. {
  530. LM_ERR("cannot print the format for first param\n");
  531. return -5;
  532. }
  533. lock_get(reload_lock);
  534. pcre_rc = pcre_exec(
  535. (*pcres_addr)[num_pcre], /* the compiled pattern */
  536. NULL, /* no extra data - we didn't study the pattern */
  537. string.s, /* the matching string */
  538. (int)(string.len), /* the length of the subject */
  539. 0, /* start at offset 0 in the string */
  540. 0, /* default options */
  541. NULL, /* output vector for substring information */
  542. 0); /* number of elements in the output vector */
  543. lock_release(reload_lock);
  544. /* Matching failed: handle error cases */
  545. if (pcre_rc < 0) {
  546. switch(pcre_rc) {
  547. case PCRE_ERROR_NOMATCH:
  548. LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
  549. break;
  550. default:
  551. LM_DBG("matching error '%d'\n", pcre_rc);
  552. break;
  553. }
  554. return -1;
  555. }
  556. else {
  557. LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
  558. return 1;
  559. }
  560. }
  561. /*
  562. * MI functions
  563. */
  564. /*! \brief Reload pcres by reading the file again */
  565. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
  566. {
  567. /* Check if group matching feature is enabled */
  568. if (file == NULL) {
  569. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  570. return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
  571. }
  572. LM_NOTICE("reloading pcres...\n");
  573. if (load_pcres(RELOAD)) {
  574. LM_ERR("failed to reload pcres\n");
  575. return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
  576. }
  577. LM_NOTICE("reload success\n");
  578. return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
  579. }