regex_mod.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684
  1. /*
  2. * $Id$
  3. *
  4. * regex module - pcre operations
  5. *
  6. * Copyright (C) 2008 Iñaki Baz Castillo
  7. *
  8. * This file is part of Kamailio, a free SIP server.
  9. *
  10. * Kamailio is free software; you can redistribute it and/or modify
  11. * it under the terms of the GNU General Public License as published by
  12. * the Free Software Foundation; either version 2 of the License, or
  13. * (at your option) any later version
  14. *
  15. * Kamailio is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU General Public License
  21. * along with this program; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. *
  24. * History:
  25. * --------
  26. * 2011-02-22 pcre_match_group() allows now pseudo-variable as group argument.
  27. * 2009-01-14 initial version (Iñaki Baz Castillo).
  28. */
  29. /*!
  30. * \file
  31. * \brief REGEX :: Perl-compatible regular expressions using PCRE library
  32. * Copyright (C) 2008 Iñaki Baz Castillo
  33. * \ingroup regex
  34. */
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <string.h>
  38. #include <sys/stat.h>
  39. #include <pcre.h>
  40. #include "../../sr_module.h"
  41. #include "../../dprint.h"
  42. #include "../../pt.h"
  43. #include "../../mem/shm_mem.h"
  44. #include "../../str.h"
  45. #include "../../locking.h"
  46. #include "../../mod_fix.h"
  47. #include "../../lib/kmi/mi.h"
  48. MODULE_VERSION
  49. #define START 0
  50. #define RELOAD 1
  51. #define FILE_MAX_LINE 500 /*!< Max line size in the file */
  52. #define MAX_GROUPS 20 /*!< Max number of groups */
  53. #define GROUP_MAX_SIZE 8192 /*!< Max size of a group */
  54. /*
  55. * Locking variables
  56. */
  57. gen_lock_t *reload_lock;
  58. /*
  59. * Module exported parameter variables
  60. */
  61. static char *file;
  62. static int max_groups = MAX_GROUPS;
  63. static int group_max_size = GROUP_MAX_SIZE;
  64. static int pcre_caseless = 0;
  65. static int pcre_multiline = 0;
  66. static int pcre_dotall = 0;
  67. static int pcre_extended = 0;
  68. /*
  69. * Module internal parameter variables
  70. */
  71. static pcre **pcres;
  72. static pcre ***pcres_addr;
  73. static int *num_pcres;
  74. static int pcre_options = 0x00000000;
  75. /*
  76. * Module core functions
  77. */
  78. static int mod_init(void);
  79. static void destroy(void);
  80. /*
  81. * Module internal functions
  82. */
  83. static int load_pcres(int);
  84. static void free_shared_memory(void);
  85. /*
  86. * Script functions
  87. */
  88. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
  89. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
  90. /*
  91. * MI functions
  92. */
  93. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
  94. /*
  95. * Exported functions
  96. */
  97. static cmd_export_t cmds[] =
  98. {
  99. { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
  100. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  101. { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_spve, 0,
  102. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  103. { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
  104. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  105. { 0, 0, 0, 0, 0, 0 }
  106. };
  107. /*
  108. * Exported parameters
  109. */
  110. static param_export_t params[] = {
  111. {"file", PARAM_STRING, &file },
  112. {"max_groups", INT_PARAM, &max_groups },
  113. {"group_max_size", INT_PARAM, &group_max_size },
  114. {"pcre_caseless", INT_PARAM, &pcre_caseless },
  115. {"pcre_multiline", INT_PARAM, &pcre_multiline },
  116. {"pcre_dotall", INT_PARAM, &pcre_dotall },
  117. {"pcre_extended", INT_PARAM, &pcre_extended },
  118. {0, 0, 0}
  119. };
  120. /*
  121. * Exported MI functions
  122. */
  123. static mi_export_t mi_cmds[] = {
  124. { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
  125. { 0, 0, 0, 0 ,0 }
  126. };
  127. /*
  128. * Module interface
  129. */
  130. struct module_exports exports = {
  131. "regex", /*!< module name */
  132. DEFAULT_DLFLAGS, /*!< dlopen flags */
  133. cmds, /*!< exported functions */
  134. params, /*!< exported parameters */
  135. 0, /*!< exported statistics */
  136. mi_cmds, /*!< exported MI functions */
  137. 0, /*!< exported pseudo-variables */
  138. 0, /*!< extra processes */
  139. mod_init, /*!< module initialization function */
  140. (response_function) 0, /*!< response handling function */
  141. destroy, /*!< destroy function */
  142. 0 /*!< per-child init function */
  143. };
  144. /*! \brief
  145. * Init module function
  146. */
  147. static int mod_init(void)
  148. {
  149. if(register_mi_mod(exports.name, mi_cmds)!=0)
  150. {
  151. LM_ERR("failed to register MI commands\n");
  152. return -1;
  153. }
  154. /* Group matching feature */
  155. if (file == NULL) {
  156. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  157. } else {
  158. /* Create and init the lock */
  159. reload_lock = lock_alloc();
  160. if (reload_lock == NULL) {
  161. LM_ERR("cannot allocate reload_lock\n");
  162. goto err;
  163. }
  164. if (lock_init(reload_lock) == NULL) {
  165. LM_ERR("cannot init the reload_lock\n");
  166. lock_dealloc(reload_lock);
  167. goto err;
  168. }
  169. /* PCRE options */
  170. if (pcre_caseless != 0) {
  171. LM_DBG("PCRE CASELESS enabled\n");
  172. pcre_options = pcre_options | PCRE_CASELESS;
  173. }
  174. if (pcre_multiline != 0) {
  175. LM_DBG("PCRE MULTILINE enabled\n");
  176. pcre_options = pcre_options | PCRE_MULTILINE;
  177. }
  178. if (pcre_dotall != 0) {
  179. LM_DBG("PCRE DOTALL enabled\n");
  180. pcre_options = pcre_options | PCRE_DOTALL;
  181. }
  182. if (pcre_extended != 0) {
  183. LM_DBG("PCRE EXTENDED enabled\n");
  184. pcre_options = pcre_options | PCRE_EXTENDED;
  185. }
  186. LM_DBG("PCRE options: %i\n", pcre_options);
  187. /* Pointer to pcres */
  188. if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
  189. LM_ERR("no memory for pcres_addr\n");
  190. goto err;
  191. }
  192. /* Integer containing the number of pcres */
  193. if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
  194. LM_ERR("no memory for num_pcres\n");
  195. goto err;
  196. }
  197. /* Load the pcres */
  198. LM_DBG("loading pcres...\n");
  199. if (load_pcres(START)) {
  200. LM_ERR("failed to load pcres\n");
  201. goto err;
  202. }
  203. }
  204. return 0;
  205. err:
  206. free_shared_memory();
  207. return -1;
  208. }
  209. static void destroy(void)
  210. {
  211. free_shared_memory();
  212. }
  213. /*! \brief Convert the file content into regular expresions and store them in pcres */
  214. static int load_pcres(int action)
  215. {
  216. int i, j;
  217. FILE *f;
  218. char line[FILE_MAX_LINE];
  219. char **patterns = NULL;
  220. pcre *pcre_tmp = NULL;
  221. size_t pcre_size;
  222. int pcre_rc;
  223. const char *pcre_error;
  224. int pcre_erroffset;
  225. int num_pcres_tmp = 0;
  226. pcre **pcres_tmp = NULL;
  227. /* Get the lock */
  228. lock_get(reload_lock);
  229. if (!(f = fopen(file, "r"))) {
  230. LM_ERR("could not open file '%s'\n", file);
  231. goto err;
  232. }
  233. /* Array containing each pattern in the file */
  234. if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
  235. LM_ERR("no more memory for patterns\n");
  236. fclose(f);
  237. goto err;
  238. }
  239. for (i=0; i<max_groups; i++) {
  240. patterns[i] = NULL;
  241. }
  242. for (i=0; i<max_groups; i++) {
  243. if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
  244. LM_ERR("no more memory for patterns[%d]\n", i);
  245. fclose(f);
  246. goto err;
  247. }
  248. memset(patterns[i], '\0', group_max_size);
  249. }
  250. /* Read the file and extract the patterns */
  251. memset(line, '\0', FILE_MAX_LINE);
  252. i = -1;
  253. while (fgets(line, FILE_MAX_LINE, f) != NULL) {
  254. /* Ignore comments and lines starting by space, tab, CR, LF */
  255. if(isspace(line[0]) || line[0]=='#') {
  256. memset(line, '\0', FILE_MAX_LINE);
  257. continue;
  258. }
  259. /* First group */
  260. if (i == -1 && line[0] != '[') {
  261. LM_ERR("first group must be initialized with [0] before any regular expression\n");
  262. fclose(f);
  263. goto err;
  264. }
  265. /* New group */
  266. if (line[0] == '[') {
  267. i++;
  268. /* Check if there are more patterns than the max value */
  269. if (i >= max_groups) {
  270. LM_ERR("max patterns exceeded\n");
  271. fclose(f);
  272. goto err;
  273. }
  274. /* Start the regular expression with '(' */
  275. patterns[i][0] = '(';
  276. memset(line, '\0', FILE_MAX_LINE);
  277. continue;
  278. }
  279. /* Check if the patter size is too big (aprox) */
  280. if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
  281. LM_ERR("pattern max file exceeded\n");
  282. fclose(f);
  283. goto err;
  284. }
  285. /* Append ')' at the end of the line */
  286. if (line[strlen(line) - 1] == '\n') {
  287. line[strlen(line)] = line[strlen(line) - 1];
  288. line[strlen(line) - 2] = ')';
  289. } else {
  290. /* This is the last char in the file and it's not \n */
  291. line[strlen(line)] = ')';
  292. }
  293. /* Append '(' at the beginning of the line */
  294. memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
  295. /* Append the line to the current pattern */
  296. memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
  297. memset(line, '\0', FILE_MAX_LINE);
  298. }
  299. num_pcres_tmp = i + 1;
  300. fclose(f);
  301. /* Fix the patterns */
  302. for (i=0; i < num_pcres_tmp; i++) {
  303. /* Convert empty groups in unmatcheable regular expression ^$ */
  304. if (strlen(patterns[i]) == 1) {
  305. patterns[i][0] = '^';
  306. patterns[i][1] = '$';
  307. patterns[i][2] = '\0';
  308. continue;
  309. }
  310. /* Delete possible '\n' at the end of the pattern */
  311. if (patterns[i][strlen(patterns[i])-1] == '\n') {
  312. patterns[i][strlen(patterns[i])-1] = '\0';
  313. }
  314. /* Replace '\n' with '|' (except at the end of the pattern) */
  315. for (j=0; j < strlen(patterns[i]); j++) {
  316. if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
  317. patterns[i][j] = '|';
  318. }
  319. }
  320. /* Add ')' at the end of the pattern */
  321. patterns[i][strlen(patterns[i])] = ')';
  322. }
  323. /* Log the group patterns */
  324. LM_INFO("num groups = %d\n", num_pcres_tmp);
  325. for (i=0; i < num_pcres_tmp; i++) {
  326. LM_INFO("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
  327. }
  328. /* Temporal pointer of pcres */
  329. if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  330. LM_ERR("no more memory for pcres_tmp\n");
  331. goto err;
  332. }
  333. for (i=0; i<num_pcres_tmp; i++) {
  334. pcres_tmp[i] = NULL;
  335. }
  336. /* Compile the patters */
  337. for (i=0; i<num_pcres_tmp; i++) {
  338. pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
  339. if (pcre_tmp == NULL) {
  340. LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
  341. goto err;
  342. }
  343. pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
  344. if (pcre_rc) {
  345. printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
  346. goto err;
  347. }
  348. if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
  349. LM_ERR("no more memory for pcres_tmp[%i]\n", i);
  350. goto err;
  351. }
  352. memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
  353. pcre_free(pcre_tmp);
  354. pkg_free(patterns[i]);
  355. }
  356. /* Copy to shared memory */
  357. if (action == RELOAD) {
  358. for(i=0; i<*num_pcres; i++) { /* Use the previous num_pcres value */
  359. if (pcres[i]) {
  360. shm_free(pcres[i]);
  361. }
  362. }
  363. shm_free(pcres);
  364. }
  365. if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  366. LM_ERR("no more memory for pcres\n");
  367. goto err;
  368. }
  369. for (i=0; i<num_pcres_tmp; i++) {
  370. pcres[i] = NULL;
  371. }
  372. for (i=0; i<num_pcres_tmp; i++) {
  373. pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
  374. if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
  375. LM_ERR("no more memory for pcres[%i]\n", i);
  376. goto err;
  377. }
  378. memcpy(pcres[i], pcres_tmp[i], pcre_size);
  379. }
  380. *num_pcres = num_pcres_tmp;
  381. *pcres_addr = pcres;
  382. /* Free used memory */
  383. for (i=0; i<num_pcres_tmp; i++) {
  384. pkg_free(pcres_tmp[i]);
  385. }
  386. pkg_free(pcres_tmp);
  387. pkg_free(patterns);
  388. lock_release(reload_lock);
  389. return 0;
  390. err:
  391. if (patterns) {
  392. for(i=0; i<max_groups; i++) {
  393. if (patterns[i]) {
  394. pkg_free(patterns[i]);
  395. }
  396. }
  397. pkg_free(patterns);
  398. }
  399. if (pcres_tmp) {
  400. for (i=0; i<num_pcres_tmp; i++) {
  401. if (pcres_tmp[i]) {
  402. pkg_free(pcres_tmp[i]);
  403. }
  404. }
  405. pkg_free(pcres_tmp);
  406. }
  407. if (reload_lock) {
  408. lock_release(reload_lock);
  409. }
  410. if (action == START) {
  411. free_shared_memory();
  412. }
  413. return -1;
  414. }
  415. static void free_shared_memory(void)
  416. {
  417. int i;
  418. if (pcres) {
  419. for(i=0; i<*num_pcres; i++) {
  420. if (pcres[i]) {
  421. shm_free(pcres[i]);
  422. }
  423. }
  424. shm_free(pcres);
  425. pcres = NULL;
  426. }
  427. if (num_pcres) {
  428. shm_free(num_pcres);
  429. num_pcres = NULL;
  430. }
  431. if (pcres_addr) {
  432. shm_free(pcres_addr);
  433. pcres_addr = NULL;
  434. }
  435. if (reload_lock) {
  436. lock_destroy(reload_lock);
  437. lock_dealloc(reload_lock);
  438. reload_lock = NULL;
  439. }
  440. }
  441. /*
  442. * Script functions
  443. */
  444. /*! \brief Return true if the argument matches the regular expression parameter */
  445. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
  446. {
  447. str string;
  448. str regex;
  449. pcre *pcre_re = NULL;
  450. int pcre_rc;
  451. const char *pcre_error;
  452. int pcre_erroffset;
  453. if (_s1 == NULL) {
  454. LM_ERR("bad parameters\n");
  455. return -2;
  456. }
  457. if (_s2 == NULL) {
  458. LM_ERR("bad parameters\n");
  459. return -2;
  460. }
  461. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  462. {
  463. LM_ERR("cannot print the format for string\n");
  464. return -3;
  465. }
  466. if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
  467. {
  468. LM_ERR("cannot print the format for regex\n");
  469. return -3;
  470. }
  471. pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
  472. if (pcre_re == NULL) {
  473. LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
  474. return -4;
  475. }
  476. pcre_rc = pcre_exec(
  477. pcre_re, /* the compiled pattern */
  478. NULL, /* no extra data - we didn't study the pattern */
  479. string.s, /* the matching string */
  480. (int)(string.len), /* the length of the subject */
  481. 0, /* start at offset 0 in the string */
  482. 0, /* default options */
  483. NULL, /* output vector for substring information */
  484. 0); /* number of elements in the output vector */
  485. /* Matching failed: handle error cases */
  486. if (pcre_rc < 0) {
  487. switch(pcre_rc) {
  488. case PCRE_ERROR_NOMATCH:
  489. LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
  490. break;
  491. default:
  492. LM_DBG("matching error '%d'\n", pcre_rc);
  493. break;
  494. }
  495. pcre_free(pcre_re);
  496. return -1;
  497. }
  498. pcre_free(pcre_re);
  499. LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
  500. return 1;
  501. }
  502. /*! \brief Return true if the string argument matches the pattern group parameter */
  503. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
  504. {
  505. str string, group;
  506. unsigned int num_pcre;
  507. int pcre_rc;
  508. /* Check if group matching feature is enabled */
  509. if (file == NULL) {
  510. LM_ERR("group matching is disabled\n");
  511. return -2;
  512. }
  513. if (_s1 == NULL) {
  514. LM_ERR("bad parameters\n");
  515. return -3;
  516. }
  517. if (_s2 == NULL) {
  518. num_pcre = 0;
  519. } else {
  520. if (fixup_get_svalue(_msg, (gparam_p)_s2, &group))
  521. {
  522. LM_ERR("cannot print the format for second param\n");
  523. return -5;
  524. }
  525. str2int(&group, &num_pcre);
  526. }
  527. if (num_pcre >= *num_pcres) {
  528. LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
  529. return -4;
  530. }
  531. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  532. {
  533. LM_ERR("cannot print the format for first param\n");
  534. return -5;
  535. }
  536. lock_get(reload_lock);
  537. pcre_rc = pcre_exec(
  538. (*pcres_addr)[num_pcre], /* the compiled pattern */
  539. NULL, /* no extra data - we didn't study the pattern */
  540. string.s, /* the matching string */
  541. (int)(string.len), /* the length of the subject */
  542. 0, /* start at offset 0 in the string */
  543. 0, /* default options */
  544. NULL, /* output vector for substring information */
  545. 0); /* number of elements in the output vector */
  546. lock_release(reload_lock);
  547. /* Matching failed: handle error cases */
  548. if (pcre_rc < 0) {
  549. switch(pcre_rc) {
  550. case PCRE_ERROR_NOMATCH:
  551. LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
  552. break;
  553. default:
  554. LM_DBG("matching error '%d'\n", pcre_rc);
  555. break;
  556. }
  557. return -1;
  558. }
  559. else {
  560. LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
  561. return 1;
  562. }
  563. }
  564. /*
  565. * MI functions
  566. */
  567. /*! \brief Reload pcres by reading the file again */
  568. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
  569. {
  570. /* Check if group matching feature is enabled */
  571. if (file == NULL) {
  572. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  573. return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
  574. }
  575. LM_INFO("reloading pcres...\n");
  576. if (load_pcres(RELOAD)) {
  577. LM_ERR("failed to reload pcres\n");
  578. return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
  579. }
  580. LM_INFO("reload success\n");
  581. return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
  582. }