2
0

regex_mod.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. /*
  2. * regex module - pcre operations
  3. *
  4. * Copyright (C) 2008 Iñaki Baz Castillo
  5. *
  6. * This file is part of Kamailio, a free SIP server.
  7. *
  8. * Kamailio is free software; you can redistribute it and/or modify
  9. * it under the terms of the GNU General Public License as published by
  10. * the Free Software Foundation; either version 2 of the License, or
  11. * (at your option) any later version
  12. *
  13. * Kamailio is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. * GNU General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU General Public License
  19. * along with this program; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. *
  22. */
  23. /*!
  24. * \file
  25. * \brief REGEX :: Perl-compatible regular expressions using PCRE library
  26. * Copyright (C) 2008 Iñaki Baz Castillo
  27. * \ingroup regex
  28. */
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #include <sys/stat.h>
  33. #include <pcre.h>
  34. #include "../../sr_module.h"
  35. #include "../../dprint.h"
  36. #include "../../pt.h"
  37. #include "../../mem/shm_mem.h"
  38. #include "../../str.h"
  39. #include "../../locking.h"
  40. #include "../../mod_fix.h"
  41. #include "../../lib/kmi/mi.h"
  42. MODULE_VERSION
  43. #define START 0
  44. #define RELOAD 1
  45. #define FILE_MAX_LINE 500 /*!< Max line size in the file */
  46. #define MAX_GROUPS 20 /*!< Max number of groups */
  47. #define GROUP_MAX_SIZE 8192 /*!< Max size of a group */
  48. /*
  49. * Locking variables
  50. */
  51. gen_lock_t *reload_lock;
  52. /*
  53. * Module exported parameter variables
  54. */
  55. static char *file;
  56. static int max_groups = MAX_GROUPS;
  57. static int group_max_size = GROUP_MAX_SIZE;
  58. static int pcre_caseless = 0;
  59. static int pcre_multiline = 0;
  60. static int pcre_dotall = 0;
  61. static int pcre_extended = 0;
  62. /*
  63. * Module internal parameter variables
  64. */
  65. static pcre **pcres;
  66. static pcre ***pcres_addr;
  67. static int *num_pcres;
  68. static int pcre_options = 0x00000000;
  69. /*
  70. * Module core functions
  71. */
  72. static int mod_init(void);
  73. static void destroy(void);
  74. /*
  75. * Module internal functions
  76. */
  77. static int load_pcres(int);
  78. static void free_shared_memory(void);
  79. /*
  80. * Script functions
  81. */
  82. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2);
  83. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2);
  84. /*
  85. * MI functions
  86. */
  87. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param);
  88. /*
  89. * Exported functions
  90. */
  91. static cmd_export_t cmds[] =
  92. {
  93. { "pcre_match", (cmd_function)w_pcre_match, 2, fixup_spve_spve, 0,
  94. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  95. { "pcre_match_group", (cmd_function)w_pcre_match_group, 2, fixup_spve_spve, 0,
  96. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  97. { "pcre_match_group", (cmd_function)w_pcre_match_group, 1, fixup_spve_null, 0,
  98. REQUEST_ROUTE|FAILURE_ROUTE|ONREPLY_ROUTE|BRANCH_ROUTE|LOCAL_ROUTE },
  99. { 0, 0, 0, 0, 0, 0 }
  100. };
  101. /*
  102. * Exported parameters
  103. */
  104. static param_export_t params[] = {
  105. {"file", PARAM_STRING, &file },
  106. {"max_groups", INT_PARAM, &max_groups },
  107. {"group_max_size", INT_PARAM, &group_max_size },
  108. {"pcre_caseless", INT_PARAM, &pcre_caseless },
  109. {"pcre_multiline", INT_PARAM, &pcre_multiline },
  110. {"pcre_dotall", INT_PARAM, &pcre_dotall },
  111. {"pcre_extended", INT_PARAM, &pcre_extended },
  112. {0, 0, 0}
  113. };
  114. /*
  115. * Exported MI functions
  116. */
  117. static mi_export_t mi_cmds[] = {
  118. { "regex_reload", mi_pcres_reload, MI_NO_INPUT_FLAG, 0, 0 },
  119. { 0, 0, 0, 0 ,0 }
  120. };
  121. /*
  122. * Module interface
  123. */
  124. struct module_exports exports = {
  125. "regex", /*!< module name */
  126. DEFAULT_DLFLAGS, /*!< dlopen flags */
  127. cmds, /*!< exported functions */
  128. params, /*!< exported parameters */
  129. 0, /*!< exported statistics */
  130. mi_cmds, /*!< exported MI functions */
  131. 0, /*!< exported pseudo-variables */
  132. 0, /*!< extra processes */
  133. mod_init, /*!< module initialization function */
  134. (response_function) 0, /*!< response handling function */
  135. destroy, /*!< destroy function */
  136. 0 /*!< per-child init function */
  137. };
  138. /*! \brief
  139. * Init module function
  140. */
  141. static int mod_init(void)
  142. {
  143. if(register_mi_mod(exports.name, mi_cmds)!=0)
  144. {
  145. LM_ERR("failed to register MI commands\n");
  146. return -1;
  147. }
  148. /* Group matching feature */
  149. if (file == NULL) {
  150. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  151. } else {
  152. /* Create and init the lock */
  153. reload_lock = lock_alloc();
  154. if (reload_lock == NULL) {
  155. LM_ERR("cannot allocate reload_lock\n");
  156. goto err;
  157. }
  158. if (lock_init(reload_lock) == NULL) {
  159. LM_ERR("cannot init the reload_lock\n");
  160. lock_dealloc(reload_lock);
  161. goto err;
  162. }
  163. /* PCRE options */
  164. if (pcre_caseless != 0) {
  165. LM_DBG("PCRE CASELESS enabled\n");
  166. pcre_options = pcre_options | PCRE_CASELESS;
  167. }
  168. if (pcre_multiline != 0) {
  169. LM_DBG("PCRE MULTILINE enabled\n");
  170. pcre_options = pcre_options | PCRE_MULTILINE;
  171. }
  172. if (pcre_dotall != 0) {
  173. LM_DBG("PCRE DOTALL enabled\n");
  174. pcre_options = pcre_options | PCRE_DOTALL;
  175. }
  176. if (pcre_extended != 0) {
  177. LM_DBG("PCRE EXTENDED enabled\n");
  178. pcre_options = pcre_options | PCRE_EXTENDED;
  179. }
  180. LM_DBG("PCRE options: %i\n", pcre_options);
  181. /* Pointer to pcres */
  182. if ((pcres_addr = shm_malloc(sizeof(pcre **))) == 0) {
  183. LM_ERR("no memory for pcres_addr\n");
  184. goto err;
  185. }
  186. /* Integer containing the number of pcres */
  187. if ((num_pcres = shm_malloc(sizeof(int))) == 0) {
  188. LM_ERR("no memory for num_pcres\n");
  189. goto err;
  190. }
  191. /* Load the pcres */
  192. LM_DBG("loading pcres...\n");
  193. if (load_pcres(START)) {
  194. LM_ERR("failed to load pcres\n");
  195. goto err;
  196. }
  197. }
  198. return 0;
  199. err:
  200. free_shared_memory();
  201. return -1;
  202. }
  203. static void destroy(void)
  204. {
  205. free_shared_memory();
  206. }
  207. /*! \brief Convert the file content into regular expresions and store them in pcres */
  208. static int load_pcres(int action)
  209. {
  210. int i, j;
  211. FILE *f;
  212. char line[FILE_MAX_LINE];
  213. char **patterns = NULL;
  214. pcre *pcre_tmp = NULL;
  215. size_t pcre_size;
  216. int pcre_rc;
  217. const char *pcre_error;
  218. int pcre_erroffset;
  219. int num_pcres_tmp = 0;
  220. pcre **pcres_tmp = NULL;
  221. /* Get the lock */
  222. lock_get(reload_lock);
  223. if (!(f = fopen(file, "r"))) {
  224. LM_ERR("could not open file '%s'\n", file);
  225. goto err;
  226. }
  227. /* Array containing each pattern in the file */
  228. if ((patterns = pkg_malloc(sizeof(char*) * max_groups)) == 0) {
  229. LM_ERR("no more memory for patterns\n");
  230. fclose(f);
  231. goto err;
  232. }
  233. memset(patterns, 0, sizeof(char*) * max_groups);
  234. for (i=0; i<max_groups; i++) {
  235. if ((patterns[i] = pkg_malloc(sizeof(char) * group_max_size)) == 0) {
  236. LM_ERR("no more memory for patterns[%d]\n", i);
  237. fclose(f);
  238. goto err;
  239. }
  240. memset(patterns[i], '\0', group_max_size);
  241. }
  242. /* Read the file and extract the patterns */
  243. memset(line, '\0', FILE_MAX_LINE);
  244. i = -1;
  245. while (fgets(line, FILE_MAX_LINE, f) != NULL) {
  246. /* Ignore comments and lines starting by space, tab, CR, LF */
  247. if(isspace(line[0]) || line[0]=='#') {
  248. memset(line, '\0', FILE_MAX_LINE);
  249. continue;
  250. }
  251. /* First group */
  252. if (i == -1 && line[0] != '[') {
  253. LM_ERR("first group must be initialized with [0] before any regular expression\n");
  254. fclose(f);
  255. goto err;
  256. }
  257. /* New group */
  258. if (line[0] == '[') {
  259. i++;
  260. /* Check if there are more patterns than the max value */
  261. if (i >= max_groups) {
  262. LM_ERR("max patterns exceeded\n");
  263. fclose(f);
  264. goto err;
  265. }
  266. /* Start the regular expression with '(' */
  267. patterns[i][0] = '(';
  268. memset(line, '\0', FILE_MAX_LINE);
  269. continue;
  270. }
  271. /* Check if the patter size is too big (aprox) */
  272. if (strlen(patterns[i]) + strlen(line) >= group_max_size - 2) {
  273. LM_ERR("pattern max file exceeded\n");
  274. fclose(f);
  275. goto err;
  276. }
  277. /* Append ')' at the end of the line */
  278. if (line[strlen(line) - 1] == '\n') {
  279. line[strlen(line)] = line[strlen(line) - 1];
  280. line[strlen(line) - 2] = ')';
  281. } else {
  282. /* This is the last char in the file and it's not \n */
  283. line[strlen(line)] = ')';
  284. }
  285. /* Append '(' at the beginning of the line */
  286. memcpy(patterns[i]+strlen(patterns[i]), "(", 1);
  287. /* Append the line to the current pattern */
  288. memcpy(patterns[i]+strlen(patterns[i]), line, strlen(line));
  289. memset(line, '\0', FILE_MAX_LINE);
  290. }
  291. num_pcres_tmp = i + 1;
  292. fclose(f);
  293. if(num_pcres_tmp==0) {
  294. LM_ERR("no expressions in the file\n");
  295. goto err;
  296. }
  297. /* Fix the patterns */
  298. for (i=0; i < num_pcres_tmp; i++) {
  299. /* Convert empty groups in unmatcheable regular expression ^$ */
  300. if (strlen(patterns[i]) == 1) {
  301. patterns[i][0] = '^';
  302. patterns[i][1] = '$';
  303. patterns[i][2] = '\0';
  304. continue;
  305. }
  306. /* Delete possible '\n' at the end of the pattern */
  307. if (patterns[i][strlen(patterns[i])-1] == '\n') {
  308. patterns[i][strlen(patterns[i])-1] = '\0';
  309. }
  310. /* Replace '\n' with '|' (except at the end of the pattern) */
  311. for (j=0; j < strlen(patterns[i]); j++) {
  312. if (patterns[i][j] == '\n' && j != strlen(patterns[i])-1) {
  313. patterns[i][j] = '|';
  314. }
  315. }
  316. /* Add ')' at the end of the pattern */
  317. patterns[i][strlen(patterns[i])] = ')';
  318. }
  319. /* Log the group patterns */
  320. LM_INFO("num groups = %d\n", num_pcres_tmp);
  321. for (i=0; i < num_pcres_tmp; i++) {
  322. LM_INFO("<group[%d]>%s</group[%d]> (size = %i)\n", i, patterns[i], i, (int)strlen(patterns[i]));
  323. }
  324. /* Temporal pointer of pcres */
  325. if ((pcres_tmp = pkg_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  326. LM_ERR("no more memory for pcres_tmp\n");
  327. goto err;
  328. }
  329. for (i=0; i<num_pcres_tmp; i++) {
  330. pcres_tmp[i] = NULL;
  331. }
  332. /* Compile the patters */
  333. for (i=0; i<num_pcres_tmp; i++) {
  334. pcre_tmp = pcre_compile(patterns[i], pcre_options, &pcre_error, &pcre_erroffset, NULL);
  335. if (pcre_tmp == NULL) {
  336. LM_ERR("pcre_tmp compilation of '%s' failed at offset %d: %s\n", patterns[i], pcre_erroffset, pcre_error);
  337. goto err;
  338. }
  339. pcre_rc = pcre_fullinfo(pcre_tmp, NULL, PCRE_INFO_SIZE, &pcre_size);
  340. if (pcre_rc) {
  341. printf("pcre_fullinfo on compiled pattern[%i] yielded error: %d\n", i, pcre_rc);
  342. goto err;
  343. }
  344. if ((pcres_tmp[i] = pkg_malloc(pcre_size)) == 0) {
  345. LM_ERR("no more memory for pcres_tmp[%i]\n", i);
  346. goto err;
  347. }
  348. memcpy(pcres_tmp[i], pcre_tmp, pcre_size);
  349. pcre_free(pcre_tmp);
  350. pkg_free(patterns[i]);
  351. patterns[i] = NULL;
  352. }
  353. /* Copy to shared memory */
  354. if (action == RELOAD) {
  355. for(i=0; i<*num_pcres; i++) { /* Use the previous num_pcres value */
  356. if (pcres[i]) {
  357. shm_free(pcres[i]);
  358. }
  359. }
  360. shm_free(pcres);
  361. }
  362. if ((pcres = shm_malloc(sizeof(pcre *) * num_pcres_tmp)) == 0) {
  363. LM_ERR("no more memory for pcres\n");
  364. goto err;
  365. }
  366. for (i=0; i<num_pcres_tmp; i++) {
  367. pcres[i] = NULL;
  368. }
  369. for (i=0; i<num_pcres_tmp; i++) {
  370. pcre_rc = pcre_fullinfo(pcres_tmp[i], NULL, PCRE_INFO_SIZE, &pcre_size);
  371. if ((pcres[i] = shm_malloc(pcre_size)) == 0) {
  372. LM_ERR("no more memory for pcres[%i]\n", i);
  373. goto err;
  374. }
  375. memcpy(pcres[i], pcres_tmp[i], pcre_size);
  376. }
  377. *num_pcres = num_pcres_tmp;
  378. *pcres_addr = pcres;
  379. /* Free used memory */
  380. for (i=0; i<num_pcres_tmp; i++) {
  381. pkg_free(pcres_tmp[i]);
  382. }
  383. pkg_free(pcres_tmp);
  384. pkg_free(patterns);
  385. lock_release(reload_lock);
  386. return 0;
  387. err:
  388. if (patterns) {
  389. for(i=0; i<max_groups; i++) {
  390. if (patterns[i]) {
  391. pkg_free(patterns[i]);
  392. }
  393. }
  394. pkg_free(patterns);
  395. }
  396. if (pcres_tmp) {
  397. for (i=0; i<num_pcres_tmp; i++) {
  398. if (pcres_tmp[i]) {
  399. pkg_free(pcres_tmp[i]);
  400. }
  401. }
  402. pkg_free(pcres_tmp);
  403. }
  404. if (reload_lock) {
  405. lock_release(reload_lock);
  406. }
  407. if (action == START) {
  408. free_shared_memory();
  409. }
  410. return -1;
  411. }
  412. static void free_shared_memory(void)
  413. {
  414. int i;
  415. if (pcres) {
  416. for(i=0; i<*num_pcres; i++) {
  417. if (pcres[i]) {
  418. shm_free(pcres[i]);
  419. }
  420. }
  421. shm_free(pcres);
  422. pcres = NULL;
  423. }
  424. if (num_pcres) {
  425. shm_free(num_pcres);
  426. num_pcres = NULL;
  427. }
  428. if (pcres_addr) {
  429. shm_free(pcres_addr);
  430. pcres_addr = NULL;
  431. }
  432. if (reload_lock) {
  433. lock_destroy(reload_lock);
  434. lock_dealloc(reload_lock);
  435. reload_lock = NULL;
  436. }
  437. }
  438. /*
  439. * Script functions
  440. */
  441. /*! \brief Return true if the argument matches the regular expression parameter */
  442. static int w_pcre_match(struct sip_msg* _msg, char* _s1, char* _s2)
  443. {
  444. str string;
  445. str regex;
  446. pcre *pcre_re = NULL;
  447. int pcre_rc;
  448. const char *pcre_error;
  449. int pcre_erroffset;
  450. if (_s1 == NULL) {
  451. LM_ERR("bad parameters\n");
  452. return -2;
  453. }
  454. if (_s2 == NULL) {
  455. LM_ERR("bad parameters\n");
  456. return -2;
  457. }
  458. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  459. {
  460. LM_ERR("cannot print the format for string\n");
  461. return -3;
  462. }
  463. if (fixup_get_svalue(_msg, (gparam_p)_s2, &regex))
  464. {
  465. LM_ERR("cannot print the format for regex\n");
  466. return -3;
  467. }
  468. pcre_re = pcre_compile(regex.s, pcre_options, &pcre_error, &pcre_erroffset, NULL);
  469. if (pcre_re == NULL) {
  470. LM_ERR("pcre_re compilation of '%s' failed at offset %d: %s\n", regex.s, pcre_erroffset, pcre_error);
  471. return -4;
  472. }
  473. pcre_rc = pcre_exec(
  474. pcre_re, /* the compiled pattern */
  475. NULL, /* no extra data - we didn't study the pattern */
  476. string.s, /* the matching string */
  477. (int)(string.len), /* the length of the subject */
  478. 0, /* start at offset 0 in the string */
  479. 0, /* default options */
  480. NULL, /* output vector for substring information */
  481. 0); /* number of elements in the output vector */
  482. /* Matching failed: handle error cases */
  483. if (pcre_rc < 0) {
  484. switch(pcre_rc) {
  485. case PCRE_ERROR_NOMATCH:
  486. LM_DBG("'%s' doesn't match '%s'\n", string.s, regex.s);
  487. break;
  488. default:
  489. LM_DBG("matching error '%d'\n", pcre_rc);
  490. break;
  491. }
  492. pcre_free(pcre_re);
  493. return -1;
  494. }
  495. pcre_free(pcre_re);
  496. LM_DBG("'%s' matches '%s'\n", string.s, regex.s);
  497. return 1;
  498. }
  499. /*! \brief Return true if the string argument matches the pattern group parameter */
  500. static int w_pcre_match_group(struct sip_msg* _msg, char* _s1, char* _s2)
  501. {
  502. str string, group;
  503. unsigned int num_pcre;
  504. int pcre_rc;
  505. /* Check if group matching feature is enabled */
  506. if (file == NULL) {
  507. LM_ERR("group matching is disabled\n");
  508. return -2;
  509. }
  510. if (_s1 == NULL) {
  511. LM_ERR("bad parameters\n");
  512. return -3;
  513. }
  514. if (_s2 == NULL) {
  515. num_pcre = 0;
  516. } else {
  517. if (fixup_get_svalue(_msg, (gparam_p)_s2, &group))
  518. {
  519. LM_ERR("cannot print the format for second param\n");
  520. return -5;
  521. }
  522. str2int(&group, &num_pcre);
  523. }
  524. if (num_pcre >= *num_pcres) {
  525. LM_ERR("invalid pcre index '%i', there are %i pcres\n", num_pcre, *num_pcres);
  526. return -4;
  527. }
  528. if (fixup_get_svalue(_msg, (gparam_p)_s1, &string))
  529. {
  530. LM_ERR("cannot print the format for first param\n");
  531. return -5;
  532. }
  533. lock_get(reload_lock);
  534. pcre_rc = pcre_exec(
  535. (*pcres_addr)[num_pcre], /* the compiled pattern */
  536. NULL, /* no extra data - we didn't study the pattern */
  537. string.s, /* the matching string */
  538. (int)(string.len), /* the length of the subject */
  539. 0, /* start at offset 0 in the string */
  540. 0, /* default options */
  541. NULL, /* output vector for substring information */
  542. 0); /* number of elements in the output vector */
  543. lock_release(reload_lock);
  544. /* Matching failed: handle error cases */
  545. if (pcre_rc < 0) {
  546. switch(pcre_rc) {
  547. case PCRE_ERROR_NOMATCH:
  548. LM_DBG("'%s' doesn't match pcres[%i]\n", string.s, num_pcre);
  549. break;
  550. default:
  551. LM_DBG("matching error '%d'\n", pcre_rc);
  552. break;
  553. }
  554. return -1;
  555. }
  556. else {
  557. LM_DBG("'%s' matches pcres[%i]\n", string.s, num_pcre);
  558. return 1;
  559. }
  560. }
  561. /*
  562. * MI functions
  563. */
  564. /*! \brief Reload pcres by reading the file again */
  565. static struct mi_root* mi_pcres_reload(struct mi_root* cmd, void* param)
  566. {
  567. /* Check if group matching feature is enabled */
  568. if (file == NULL) {
  569. LM_NOTICE("'file' parameter is not set, group matching disabled\n");
  570. return init_mi_tree(403, MI_SSTR("Group matching not enabled"));
  571. }
  572. LM_INFO("reloading pcres...\n");
  573. if (load_pcres(RELOAD)) {
  574. LM_ERR("failed to reload pcres\n");
  575. return init_mi_tree(500, MI_INTERNAL_ERR_S, MI_INTERNAL_ERR_LEN);
  576. }
  577. LM_INFO("reload success\n");
  578. return init_mi_tree(200, MI_OK_S, MI_OK_LEN);
  579. }