regex_mod.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. /*
  2. * $Id$
  3. *
  4. * regex module - pcre operations
  5. *
  6. * Copyright (C) 2008 Iñaki Baz Castillo
  7. *
  8. * This file is part of Kamailio, a free SIP server.
  9. *
  10. * Kamailio is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version
  14. *
  15. * Kamailio is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23. *
  24. * History:
  25. * --------
  26. * 2009-01-14 initial version (Iñaki Baz Castillo)
  27. */
  28. /*!
  29. * \file
  30. * \brief REGEX :: Perl-compatible regular expressions using PCRE library
  31. * Copyright (C) 2008 Iñaki Baz Castillo
  32. * \ingroup regex
  33. */
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <sys/stat.h>
  38. #include <pcre.h>
  39. #include "../../sr_module.h"
  40. #include "../../dprint.h"
  41. #include "../../pt.h"
  42. #include "../../mem/shm_mem.h"
  43. #include "../../str.h"
  44. #include "../../locking.h"
  45. #include "../../mod_fix.h"
  46. #include "../../lib/kmi/mi.h"
  47. MODULE_VERSION
  48. #define START 0
  49. #define RELOAD 1
  50. #define FILE_MAX_LINE 500 /*!< Max line size in the file */
  51. #define MAX_GROUPS 20 /*!< Max number of groups */
  52. #define GROUP_MAX_SIZE 8192 /*!< Max size of a group */
  53. /*
  54. * Locking variables
  55. */
  56. gen_lock_t *reload_lock;
  57. /*
  58. * Module exported parameter variables
  59. */
  60. static char *file;
  61. static int max_groups = MAX_GROUPS;
  62. static int group_max_size = GROUP_MAX_SIZE;
  63. static int pcre_caseless = 0;
  64. static int pcre_multiline = 0;
  65. static int pcre_dotall = 0;
  66. static int pcre_extended = 0;
  67. /*
  68. * Module internal parameter variables
  69. */
  70. static pcre **pcres;
  71. static pcre ***pcres_addr;
  72. static int *num_pcres;
  73. static int pcre_options = 0x00000000;
  74. /*
  75. * Module core functions
  76. */
  77. static int mod_init(void);
  78. static void destroy(void);
  79. /*
  80. * Module internal functions
  81. */
  82. static int load_pcres(int);
  83. static void free_shared_memory(void);
  84. /*
  85. * Script functions
  86. */
  87. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
  88. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
  89. /*
  90. * MI functions
  91. */
  92. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
  93. /*
  94. * Exported functions
  95. */
  96. static cmd_export_t cmds[] =
  97. {
  98. { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
  99. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  100. { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_uint, 0,
  101. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  102. { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
  103. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  104. { 0, 0, 0, 0, 0, 0 }
  105. };
  106. /*
  107. * Exported parameters
  108. */
  109. static param_export_t params[] = {
  110. {"file", STR_PARAM, &file },
  111. {"max_groups", INT_PARAM, &max_groups },
  112. {"group_max_size", INT_PARAM, &group_max_size },
  113. {"pcre_caseless", INT_PARAM, &pcre_caseless },
  114. {"pcre_multiline", INT_PARAM, &pcre_multiline },
  115. {"pcre_dotall", INT_PARAM, &pcre_dotall },
  116. {"pcre_extended", INT_PARAM, &pcre_extended },
  117. {0, 0, 0}
  118. };
  119. /*
  120. * Exported MI functions
  121. */
  122. static mi_export_t mi_cmds[] = {
  123. { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
  124. { 0, 0, 0, 0 ,0 }
  125. };
  126. /*
  127. * Module interface
  128. */
  129. struct module_exports exports = {
  130. "regex", /*!< module name */
  131. DEFAULT_DLFLAGS, /*!< dlopen flags */
  132. cmds, /*!< exported functions */
  133. params, /*!< exported parameters */
  134. 0, /*!< exported statistics */
  135. mi_cmds, /*!< exported MI functions */
  136. 0, /*!< exported pseudo-variables */
  137. 0, /*!< extra processes */
  138. mod_init, /*!< module initialization function */
  139. (response_function) 0, /*!< response handling function */
  140. destroy, /*!< destroy function */
  141. 0 /*!< per-child init function */
  142. };
  143. /*! \brief
  144. * Init module function
  145. */
  146. static int mod_init(void)
  147. {
  148. LM_INFO("initializing module...\n");
  149. /* Group matching feature */
  150. if (file == NULL) {
  151. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  152. } else {
  153. /* Create and init the lock */
  154. reload_lock = lock_alloc();
  155. if (reload_lock == NULL) {
  156. LM_ERR("cannot allocate reload_lock\n");
  157. goto err;
  158. }
  159. if (lock_init(reload_lock) == NULL) {
  160. LM_ERR("cannot init the reload_lock\n");
  161. lock_dealloc(reload_lock);
  162. goto err;
  163. }
  164. /* PCRE options */
  165. if (pcre_caseless != 0) {
  166. LM_DBG("PCRE CASELESS enabled\n");
  167. pcre_options = pcre_options | PCRE_CASELESS;
  168. }
  169. if (pcre_multiline != 0) {
  170. LM_DBG("PCRE MULTILINE enabled\n");
  171. pcre_options = pcre_options | PCRE_MULTILINE;
  172. }
  173. if (pcre_dotall != 0) {
  174. LM_DBG("PCRE DOTALL enabled\n");
  175. pcre_options = pcre_options | PCRE_DOTALL;
  176. }
  177. if (pcre_extended != 0) {
  178. LM_DBG("PCRE EXTENDED enabled\n");
  179. pcre_options = pcre_options | PCRE_EXTENDED;
  180. }
  181. LM_DBG("PCRE options: %i\n", pcre_options);
  182. /* Pointer to pcres */
  183. if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
  184. LM_ERR("no memory for pcres_addr\n");
  185. goto err;
  186. }
  187. /* Integer containing the number of pcres */
  188. if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
  189. LM_ERR("no memory for num_pcres\n");
  190. goto err;
  191. }
  192. /* Load the pcres */
  193. LM_NOTICE("loading pcres...\n");
  194. if (load_pcres(START)) {
  195. LM_CRIT("failed to load pcres\n");
  196. goto err;
  197. }
  198. }
  199. return 0;
  200. err:
  201. free_shared_memory();
  202. return -1;
  203. }
  204. static void destroy(void)
  205. {
  206. free_shared_memory();
  207. }
  208. /*! \brief Convert the file content into regular expresions and store them in pcres */
  209. static int load_pcres(int action)
  210. {
  211. int i, j;
  212. FILE *f;
  213. char line[FILE_MAX_LINE];
  214. char **patterns = NULL;
  215. pcre *pcre_tmp = NULL;
  216. int pcre_size;
  217. int pcre_rc;
  218. const char *pcre_error;
  219. int pcre_erroffset;
  220. int num_pcres_tmp = 0;
  221. pcre **pcres_tmp = NULL;
  222. /* Get the lock */
  223. lock_get(reload_lock);
  224. if (!(f = fopen(file, "r"))) {
  225. LM_ERR("could not open file '%s'\n", file);
  226. goto err;
  227. }
  228. /* Array containing each pattern in the file */
  229. if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
  230. LM_ERR("no more memory for patterns\n");
  231. fclose(f);
  232. goto err;
  233. }
  234. for (i=0; i<max_groups; i++) {
  235. patterns[i] = NULL;
  236. }
  237. for (i=0; i<max_groups; i++) {
  238. if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
  239. LM_ERR("no more memory for patterns[%d]\n", i);
  240. fclose(f);
  241. goto err;
  242. }
  243. memset(patterns[i], '\0', group_max_size);
  244. }
  245. /* Read the file and extract the patterns */
  246. memset(line, '\0', FILE_MAX_LINE);
  247. i = -1;
  248. while (fgets(line, FILE_MAX_LINE, f) != NULL) {
  249. /* Ignore comments and lines starting by space, tab, CR, LF */
  250. if(isspace(line[0]) || line[0]=='#') {
  251. memset(line, '\0', FILE_MAX_LINE);
  252. continue;
  253. }
  254. /* First group */
  255. if (i == -1 && line[0] != '[') {
  256. LM_ERR("first group must be initialized with [0] before any regular expression\n");
  257. fclose(f);
  258. goto err;
  259. }
  260. /* New group */
  261. if (line[0] == '[') {
  262. i++;
  263. /* Check if there are more patterns than the max value */
  264. if (i >= max_groups) {
  265. LM_ERR("max patterns exceded\n");
  266. fclose(f);
  267. goto err;
  268. }
  269. /* Start the regular expression with '(' */
  270. patterns[i][0] = '(';
  271. memset(line, '\0', FILE_MAX_LINE);
  272. continue;
  273. }
  274. /* Check if the patter size is too big (aprox) */
  275. if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
  276. LM_ERR("pattern max file exceded\n");
  277. fclose(f);
  278. goto err;
  279. }
  280. /* Append ')' at the end of the line */
  281. if (line[strlen(line) - 1] == '\n') {
  282. line[strlen(line)] = line[strlen(line) - 1];
  283. line[strlen(line) - 2] = ')';
  284. } else {
  285. /* This is the last char in the file and it's not \n */
  286. line[strlen(line)] = ')';
  287. }
  288. /* Append '(' at the beginning of the line */
  289. memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
  290. /* Append the line to the current pattern */
  291. memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
  292. memset(line, '\0', FILE_MAX_LINE);
  293. }
  294. num_pcres_tmp = i + 1;
  295. fclose(f);
  296. /* Fix the patterns */
  297. for (i=0; i < num_pcres_tmp; i++) {
  298. /* Convert empty groups in unmatcheable regular expression ^$ */
  299. if (strlen(patterns[i]) == 1) {
  300. patterns[i][0] = '^';
  301. patterns[i][1] = '$';
  302. patterns[i][2] = '\0';
  303. continue;
  304. }
  305. /* Delete possible '\n' at the end of the pattern */
  306. if (patterns[i][strlen(patterns[i])-1] == '\n') {
  307. patterns[i][strlen(patterns[i])-1] = '\0';
  308. }
  309. /* Replace '\n' with '|' (except at the end of the pattern) */
  310. for (j=0; j < strlen(patterns[i]); j++) {
  311. if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
  312. patterns[i][j] = '|';
  313. }
  314. }
  315. /* Add ')' at the end of the pattern */
  316. patterns[i][strlen(patterns[i])] = ')';
  317. }
  318. /* Log the group patterns */
  319. LM_NOTICE("num groups = %d\n\n", num_pcres_tmp);
  320. for (i=0; i < num_pcres_tmp; i++) {
  321. LM_NOTICE("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
  322. }
  323. /* Temporal pointer of pcres */
  324. if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  325. LM_ERR("no more memory for pcres_tmp\n");
  326. goto err;
  327. }
  328. for (i=0; i<num_pcres_tmp; i++) {
  329. pcres_tmp[i] = NULL;
  330. }
  331. /* Compile the patters */
  332. for (i=0; i<num_pcres_tmp; i++) {
  333. pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
  334. if (pcre_tmp == NULL) {
  335. LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
  336. goto err;
  337. }
  338. pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
  339. if (pcre_rc) {
  340. printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
  341. goto err;
  342. }
  343. if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
  344. LM_ERR("no more memory for pcres_tmp[%i]\n", i);
  345. goto err;
  346. }
  347. memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
  348. pcre_free(pcre_tmp);
  349. pkg_free(patterns[i]);
  350. }
  351. /* Copy to shared memory */
  352. if (action == RELOAD) {
  353. for(i=0; i<*num_pcres; i++) { /* Use the previous num_pcres value */
  354. if (pcres[i]) {
  355. shm_free(pcres[i]);
  356. }
  357. }
  358. shm_free(pcres);
  359. }
  360. if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  361. LM_ERR("no more memory for pcres\n");
  362. goto err;
  363. }
  364. for (i=0; i<num_pcres_tmp; i++) {
  365. pcres[i] = NULL;
  366. }
  367. for (i=0; i<num_pcres_tmp; i++) {
  368. pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
  369. if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
  370. LM_ERR("no more memory for pcres[%i]\n", i);
  371. goto err;
  372. }
  373. memcpy(pcres[i], pcres_tmp[i], pcre_size);
  374. }
  375. *num_pcres = num_pcres_tmp;
  376. *pcres_addr = pcres;
  377. /* Free used memory */
  378. for (i=0; i<num_pcres_tmp; i++) {
  379. pkg_free(pcres_tmp[i]);
  380. }
  381. pkg_free(pcres_tmp);
  382. pkg_free(patterns);
  383. lock_release(reload_lock);
  384. return 0;
  385. err:
  386. if (patterns) {
  387. for(i=0; i<max_groups; i++) {
  388. if (patterns[i]) {
  389. pkg_free(patterns[i]);
  390. }
  391. }
  392. pkg_free(patterns);
  393. }
  394. if (pcres_tmp) {
  395. for (i=0; i<num_pcres_tmp; i++) {
  396. if (pcres_tmp[i]) {
  397. pkg_free(pcres_tmp[i]);
  398. }
  399. }
  400. pkg_free(pcres_tmp);
  401. }
  402. if (reload_lock) {
  403. lock_release(reload_lock);
  404. }
  405. if (action == START) {
  406. free_shared_memory();
  407. }
  408. return -1;
  409. }
  410. static void free_shared_memory(void)
  411. {
  412. int i;
  413. if (pcres) {
  414. for(i=0; i<*num_pcres; i++) {
  415. if (pcres[i]) {
  416. shm_free(pcres[i]);
  417. }
  418. }
  419. shm_free(pcres);
  420. }
  421. if (num_pcres) {
  422. shm_free(num_pcres);
  423. }
  424. if (pcres_addr) {
  425. shm_free(pcres_addr);
  426. }
  427. if (reload_lock) {
  428. lock_destroy(reload_lock);
  429. lock_dealloc(reload_lock);
  430. }
  431. }
  432. /*
  433. * Script functions
  434. */
  435. /*! \brief Return true if the argument matches the regular expression parameter */
  436. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
  437. {
  438. str string;
  439. str regex;
  440. pcre *pcre_re = NULL;
  441. int pcre_rc;
  442. const char *pcre_error;
  443. int pcre_erroffset;
  444. if (_s1 == NULL) {
  445. LM_ERR("bad parameters\n");
  446. return -2;
  447. }
  448. if (_s2 == NULL) {
  449. LM_ERR("bad parameters\n");
  450. return -2;
  451. }
  452. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  453. {
  454. LM_ERR("cannot print the format for string\n");
  455. return -3;
  456. }
  457. if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
  458. {
  459. LM_ERR("cannot print the format for regex\n");
  460. return -3;
  461. }
  462. pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
  463. if (pcre_re == NULL) {
  464. LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
  465. return -4;
  466. }
  467. pcre_rc = pcre_exec(
  468. pcre_re, /* the compiled pattern */
  469. NULL, /* no extra data - we didn't study the pattern */
  470. string.s, /* the matching string */
  471. (int)(string.len), /* the length of the subject */
  472. 0, /* start at offset 0 in the string */
  473. 0, /* default options */
  474. NULL, /* output vector for substring information */
  475. 0); /* number of elements in the output vector */
  476. /* Matching failed: handle error cases */
  477. if (pcre_rc < 0) {
  478. switch(pcre_rc) {
  479. case PCRE_ERROR_NOMATCH:
  480. LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
  481. break;
  482. default:
  483. LM_DBG("matching error '%d'\n", pcre_rc);
  484. break;
  485. }
  486. return -1;
  487. }
  488. LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
  489. return 1;
  490. }
  491. /*! \brief Return true if the string argument matches the pattern group parameter */
  492. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
  493. {
  494. str string;
  495. int num_pcre;
  496. int pcre_rc;
  497. /* Check if group matching feature is enabled */
  498. if (file == NULL) {
  499. LM_ERR("group matching is disabled\n");
  500. return -2;
  501. }
  502. if (_s1 == NULL) {
  503. LM_ERR("bad parameters\n");
  504. return -3;
  505. }
  506. if (_s2 == NULL) {
  507. num_pcre = 0;
  508. } else {
  509. num_pcre = (uint)(long)_s2;
  510. }
  511. if (num_pcre >= *num_pcres) {
  512. LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
  513. return -4;
  514. }
  515. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  516. {
  517. LM_ERR("cannot print the format\n");
  518. return -5;
  519. }
  520. lock_get(reload_lock);
  521. pcre_rc = pcre_exec(
  522. (*pcres_addr)[num_pcre], /* the compiled pattern */
  523. NULL, /* no extra data - we didn't study the pattern */
  524. string.s, /* the matching string */
  525. (int)(string.len), /* the length of the subject */
  526. 0, /* start at offset 0 in the string */
  527. 0, /* default options */
  528. NULL, /* output vector for substring information */
  529. 0); /* number of elements in the output vector */
  530. lock_release(reload_lock);
  531. /* Matching failed: handle error cases */
  532. if (pcre_rc < 0) {
  533. switch(pcre_rc) {
  534. case PCRE_ERROR_NOMATCH:
  535. LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
  536. break;
  537. default:
  538. LM_DBG("matching error '%d'\n", pcre_rc);
  539. break;
  540. }
  541. return -1;
  542. }
  543. else {
  544. LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
  545. return 1;
  546. }
  547. }
  548. /*
  549. * MI functions
  550. */
  551. /*! \brief Reload pcres by reading the file again */
  552. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
  553. {
  554. /* Check if group matching feature is enabled */
  555. if (file == NULL) {
  556. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  557. return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
  558. }
  559. LM_NOTICE("reloading pcres...\n");
  560. if (load_pcres(RELOAD)) {
  561. LM_ERR("failed to reload pcres\n");
  562. return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
  563. }
  564. LM_NOTICE("reload success\n");
  565. return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
  566. }