瀏覽代碼

Merge remote branch 'origin/andrei/raw_sock'

Raw socket support for sending UDP IPv4 packets
(major performance increase on multi-cpu machines running linux:
 40-50% faster at least in stateless mode).

* origin/andrei/raw_sock:
  NEWS: notes about the new udp4_raw mode
  raw sockets: added info rpc
  core: compile raw socket support by default on freebsd
  raw sockets: freebsd support
  cfg: delay cfg_shmize to just before forking
  core: always compile the raw sockets code on linux
  raw sockets: ttl can be set from the config file
  raw sockets: ttl can be set or auto-detected
  core: include raw socket support in version info
  raw sockets: use BSD ip & udp structure versions
  raw sockets: config file support
  raw sockets: udp send will use now raw sockets if enabled
  raw sockets: runtime config support
  raw sockets: build ip header & fragmentation support
  raw socket: compilation fixes
  raw sockets: get dst. ip from the ip header
  core: basic support for receiving udp sip packets on raw sockets
  core: basic raw socket support functions
Andrei Pelinescu-Onciul 15 年之前
父節點
當前提交
88b1fb440e
共有 17 個文件被更改,包括 1377 次插入47 次删除
  1. 2 2
      Makefile.defs
  2. 35 0
      NEWS
  3. 6 0
      cfg.lex
  4. 19 0
      cfg.y
  5. 76 10
      cfg_core.c
  6. 7 5
      cfg_core.h
  7. 27 0
      core_cmd.c
  8. 4 0
      globals.h
  9. 98 10
      main.c
  10. 144 0
      raw_listener.c
  11. 39 0
      raw_listener.h
  12. 696 0
      raw_sock.c
  13. 56 0
      raw_sock.h
  14. 71 0
      sock_ut.c
  15. 39 0
      sock_ut.h
  16. 50 19
      udp_server.c
  17. 8 1
      ver_defs.h

+ 2 - 2
Makefile.defs

@@ -1649,7 +1649,7 @@ ifeq ($(OS), linux)
 	use_futex= yes
 	use_futex= yes
 	C_DEFS+=-DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN -DHAVE_SCHED_YIELD \
 	C_DEFS+=-DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN -DHAVE_SCHED_YIELD \
 			-DHAVE_MSG_NOSIGNAL -DHAVE_MSGHDR_MSG_CONTROL -DHAVE_ALLOCA_H \
 			-DHAVE_MSG_NOSIGNAL -DHAVE_MSGHDR_MSG_CONTROL -DHAVE_ALLOCA_H \
-			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER
+			-DHAVE_TIMEGM -DHAVE_SCHED_SETSCHEDULER -DUSE_RAW_SOCKS
 	ifneq ($(found_lock_method), yes)
 	ifneq ($(found_lock_method), yes)
 		#C_DEFS+= -DUSE_POSIX_SEM
 		#C_DEFS+= -DUSE_POSIX_SEM
 		C_DEFS+=-DUSE_PTHREAD_MUTEX
 		C_DEFS+=-DUSE_PTHREAD_MUTEX
@@ -1768,7 +1768,7 @@ ifeq ($(OS), freebsd)
 	C_DEFS+=-DHAVE_SOCKADDR_SA_LEN -DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN \
 	C_DEFS+=-DHAVE_SOCKADDR_SA_LEN -DHAVE_GETHOSTBYNAME2 -DHAVE_UNION_SEMUN \
 		-DHAVE_SCHED_YIELD -DHAVE_MSGHDR_MSG_CONTROL \
 		-DHAVE_SCHED_YIELD -DHAVE_MSGHDR_MSG_CONTROL \
 		-DHAVE_CONNECT_ECONNRESET_BUG -DHAVE_TIMEGM \
 		-DHAVE_CONNECT_ECONNRESET_BUG -DHAVE_TIMEGM \
-		-DHAVE_NETINET_IN_SYSTM
+		-DHAVE_NETINET_IN_SYSTM -DUSE_RAW_SOCKS
 	ifneq ($(found_lock_method), yes)
 	ifneq ($(found_lock_method), yes)
 		C_DEFS+= -DUSE_PTHREAD_MUTEX  # try pthread sems
 		C_DEFS+= -DUSE_PTHREAD_MUTEX  # try pthread sems
 		found_lock_method=yes
 		found_lock_method=yes

+ 35 - 0
NEWS

@@ -21,6 +21,19 @@ core:
             t_set_fr($foo) (equivalent now with t_set_fr("$foo")).
             t_set_fr($foo) (equivalent now with t_set_fr("$foo")).
   - all the module functions can now be called with any constant expression
   - all the module functions can now be called with any constant expression
       as parameters. E.g.: f("7 *" +" 6 = " + 7 * 6);
       as parameters. E.g.: f("7 *" +" 6 = " + 7 * 6);
+  - major performance increase on Linux multi-cpu machines that send a lot
+     of UDP IPv4 packets (40-50% faster in stateless mode). For it to work
+     udp4_raw must be enabled or set into auto mode in sr.cfg and sr must be
+     started as root or with CAP_NET_RAW. Note that even if udp4_raw is
+     off (default), if sr was started with enough privileges, it can be
+     enabled at runtime.
+     The support for using raw sockets is also available on FreeBSD (compiled
+     by default but not tested for performance yet), NetBSD, OpenBSD and
+     Darwin (not tested and not compiled by default, needs
+     make cfg extra_defs=-DUSE_RAW_SOCKS). To check if the support is
+     compiled, use ser -V |grep --color RAW_SOCKS or for a running
+     ser: sercmd core.udp4_raw_info.
+     See udp4_raw, udp4_raw_mtu and udp4_raw_ttl below.
   - onreply_route {...} is now equivalent with onreply_route[0] {...}
   - onreply_route {...} is now equivalent with onreply_route[0] {...}
   - global, per protocol blacklist ignore masks (via extended send_flags).
   - global, per protocol blacklist ignore masks (via extended send_flags).
     See dst_blacklist_udp_imask a.s.o (dst_blacklist_*_imask).
     See dst_blacklist_udp_imask a.s.o (dst_blacklist_*_imask).
@@ -29,6 +42,28 @@ core:
   - support for permanent entries in the DNS cache.
   - support for permanent entries in the DNS cache.
 
 
 new config variables:
 new config variables:
+  - udp4_raw - enables raw socket support for sending UDP IPv4 datagrams 
+      (40-50% performance increase on linux multi-cpu).
+      Possible values: 0 - disabled (default), 1 - enabled, -1 auto.
+      In "auto" mode it will be enabled if possible (sr started as root or
+      with CAP_NET_RAW).
+      udp4_raw can be used on Linux and FreeBSD. For other BSDs and Darwin
+      one must compile with -DUSE_RAW_SOCKS.
+      On Linux one should also set udp4_raw_mtu if the MTU on any network
+      interface that could be used for sending is smaller then 1500.
+      Can be set at runtime as long as sr was started with enough privileges
+      (core.udp4_raw).
+  - udp4_raw_mtu - MTU value used for UDP IPv4 packets when udp4_raw is
+      enabled.  It should be set to the minimum MTU of all the network
+      interfaces that could be used for sending. The default value is 1500.
+      Note that on BSDs it does not need to be set (if set it will be ignored,
+      the proper MTU will be used automatically by the kernel). On Linux it
+      should be set.
+      Can be set at runtime (core.udp4_raw_mtu).
+  - udp4_raw_ttl - TTL value used for UDP IPv4 packets when udp4_raw is
+      enabled. By default it is set to auto mode (-1), meaning that the
+      same TTL will be used as for normal UDP sockets.
+      Can be set at runtime (core.udp4_raw_ttl).
   - dst_blacklist_udp_imask - global blacklist events ignore mask for udp
   - dst_blacklist_udp_imask - global blacklist events ignore mask for udp
     (a blacklist event/reason set in this variable will be ignored when 
     (a blacklist event/reason set in this variable will be ignored when 
     deciding whether or not to blacklist an udp destination). Can be set
     deciding whether or not to blacklist an udp destination). Can be set

+ 6 - 0
cfg.lex

@@ -211,6 +211,9 @@ ADD_LOCAL_RPORT		"add_local_rport"
 FORCE_TCP_ALIAS		"force_tcp_alias"|"add_tcp_alias"
 FORCE_TCP_ALIAS		"force_tcp_alias"|"add_tcp_alias"
 UDP_MTU		"udp_mtu"
 UDP_MTU		"udp_mtu"
 UDP_MTU_TRY_PROTO	"udp_mtu_try_proto"
 UDP_MTU_TRY_PROTO	"udp_mtu_try_proto"
+UDP4_RAW		"udp4_raw"
+UDP4_RAW_MTU	"udp4_raw_mtu"
+UDP4_RAW_TTL	"udp4_raw_ttl"
 SETFLAG		setflag
 SETFLAG		setflag
 RESETFLAG	resetflag
 RESETFLAG	resetflag
 ISFLAGSET	isflagset
 ISFLAGSET	isflagset
@@ -605,6 +608,9 @@ SUBST       subst
 <INITIAL>{UDP_MTU}	{ count(); yylval.strval=yytext; return UDP_MTU; }
 <INITIAL>{UDP_MTU}	{ count(); yylval.strval=yytext; return UDP_MTU; }
 <INITIAL>{UDP_MTU_TRY_PROTO}	{ count(); yylval.strval=yytext;
 <INITIAL>{UDP_MTU_TRY_PROTO}	{ count(); yylval.strval=yytext;
 									return UDP_MTU_TRY_PROTO; }
 									return UDP_MTU_TRY_PROTO; }
+<INITIAL>{UDP4_RAW}	{ count(); yylval.strval=yytext; return UDP4_RAW; }
+<INITIAL>{UDP4_RAW_MTU}	{ count(); yylval.strval=yytext; return UDP4_RAW_MTU; }
+<INITIAL>{UDP4_RAW_TTL}	{ count(); yylval.strval=yytext; return UDP4_RAW_TTL; }
 <INITIAL>{IF}	{ count(); yylval.strval=yytext; return IF; }
 <INITIAL>{IF}	{ count(); yylval.strval=yytext; return IF; }
 <INITIAL>{ELSE}	{ count(); yylval.strval=yytext; return ELSE; }
 <INITIAL>{ELSE}	{ count(); yylval.strval=yytext; return ELSE; }
 
 

+ 19 - 0
cfg.y

@@ -200,6 +200,12 @@
 	#define IF_SCTP(x) warn("sctp support not compiled in")
 	#define IF_SCTP(x) warn("sctp support not compiled in")
 #endif
 #endif
 
 
+#ifdef USE_RAW_SOCKS
+	#define IF_RAW_SOCKS(x) x
+#else
+	#define IF_RAW_SOCKS(x) warn("raw socket support not compiled in")
+#endif
+
 
 
 extern int yylex();
 extern int yylex();
 /* safer then using yytext which can be array or pointer */
 /* safer then using yytext which can be array or pointer */
@@ -325,6 +331,9 @@ extern char *finame;
 %token FORCE_TCP_ALIAS
 %token FORCE_TCP_ALIAS
 %token UDP_MTU
 %token UDP_MTU
 %token UDP_MTU_TRY_PROTO
 %token UDP_MTU_TRY_PROTO
+%token UDP4_RAW
+%token UDP4_RAW_MTU
+%token UDP4_RAW_TTL
 %token IF
 %token IF
 %token ELSE
 %token ELSE
 %token SET_ADV_ADDRESS
 %token SET_ADV_ADDRESS
@@ -1581,6 +1590,16 @@ assign_stm:
 		{ default_core_cfg.udp_mtu_try_proto=$3; fix_global_req_flags(0, 0); }
 		{ default_core_cfg.udp_mtu_try_proto=$3; fix_global_req_flags(0, 0); }
 	| UDP_MTU_TRY_PROTO EQUAL error
 	| UDP_MTU_TRY_PROTO EQUAL error
 		{ yyerror("TCP, TLS, SCTP or UDP expected"); }
 		{ yyerror("TCP, TLS, SCTP or UDP expected"); }
+	| UDP4_RAW EQUAL intno { IF_RAW_SOCKS(default_core_cfg.udp4_raw=$3); }
+	| UDP4_RAW EQUAL error { yyerror("number expected"); }
+	| UDP4_RAW_MTU EQUAL NUMBER {
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_mtu=$3);
+	}
+	| UDP4_RAW_MTU EQUAL error { yyerror("number expected"); }
+	| UDP4_RAW_TTL EQUAL NUMBER {
+		IF_RAW_SOCKS(default_core_cfg.udp4_raw_ttl=$3);
+	}
+	| UDP4_RAW_TTL EQUAL error { yyerror("number expected"); }
 	| cfg_var
 	| cfg_var
 	| error EQUAL { yyerror("unknown config variable"); }
 	| error EQUAL { yyerror("unknown config variable"); }
 	;
 	;

+ 76 - 10
cfg_core.c

@@ -24,16 +24,15 @@
  *  2007-12-03	Initial version (Miklos)
  *  2007-12-03	Initial version (Miklos)
  *  2008-01-31  added DNS resolver parameters (Miklos)
  *  2008-01-31  added DNS resolver parameters (Miklos)
  */
  */
-/*!
- * \file
- * \brief SIP-router core ::  Core configuration parser
- * \ingroup core
- * Module: \ref core
+/** core runtime config.
+ * @file cfg_core.c
+ * @ingroup core
+ * Module: @ref core
  *
  *
- * See 
- * - \ref ConfigCoreDoc
- * - \ref ConfigEngine
- * - \ref cfg_core.h
+ * See
+ * - @ref ConfigCoreDoc
+ * - @ref ConfigEngine
+ * - @ref cfg_core.h
  */
  */
 /*!
 /*!
  * \page ConfigCoreDoc Documentation of configuration parser
  * \page ConfigCoreDoc Documentation of configuration parser
@@ -57,6 +56,8 @@
 #include "pt.h"
 #include "pt.h"
 #endif
 #endif
 #include "msg_translator.h" /* fix_global_req_flags() */
 #include "msg_translator.h" /* fix_global_req_flags() */
+#include "globals.h"
+#include "sock_ut.h"
 #include "cfg/cfg.h"
 #include "cfg/cfg.h"
 #include "cfg_core.h"
 #include "cfg_core.h"
 
 
@@ -113,6 +114,9 @@ struct cfg_group_core default_core_cfg = {
 	DEFAULT_MAX_WHILE_LOOPS, /*!< max_while_loops */
 	DEFAULT_MAX_WHILE_LOOPS, /*!< max_while_loops */
 	0, /*!< udp_mtu (disabled by default) */
 	0, /*!< udp_mtu (disabled by default) */
 	0, /*!< udp_mtu_try_proto -> default disabled */
 	0, /*!< udp_mtu_try_proto -> default disabled */
+	0, /**< udp4_raw (disabled by default) */
+	1500, /**< udp4_raw_mtu (1500 by default) */
+	-1,  /**< udp4_raw_ttl (auto detect by default) */
 	0,  /*!< force_rport */
 	0,  /*!< force_rport */
 	L_DBG, /*!< memlog */
 	L_DBG, /*!< memlog */
 	3 /*!< mem_summary -flags: 0 off, 1 pkg_status, 2 shm_status,
 	3 /*!< mem_summary -flags: 0 off, 1 pkg_status, 2 shm_status,
@@ -121,6 +125,57 @@ struct cfg_group_core default_core_cfg = {
 
 
 void	*core_cfg = &default_core_cfg;
 void	*core_cfg = &default_core_cfg;
 
 
+
+static int check_raw_sock_support(void* cfg_h, str* gname, str* name,
+									void** v)
+{
+	int val;
+	
+	val = (int)(long)(*v);
+#ifndef USE_RAW_SOCKS
+	if (val > 0) {
+		ERR("no RAW_SOCKS support, please recompile with it enabled\n");
+		return -1;
+	}
+	return 0;
+#else /* USE_RAW_SOCKS */
+	if (raw_udp4_send_sock < 0) {
+		if (val > 0) {
+			ERR("could not intialize raw socket on startup, please "
+					"restart as root or with CAP_NET_RAW\n");
+			return -1;
+		} else if (val < 0) {
+			/* auto and no socket => disable */
+			*v = (void*)(long)0;
+		}
+	} else if (val < 0) {
+		/* auto and socket => enable */
+		*v = (void*)(long)1;
+	}
+	return 0;
+#endif /* USE_RAW_SOCKS */
+}
+
+
+
+static int  udp4_raw_ttl_fixup(void* cfg_h, str* gname, str* name, void** val)
+{
+	int v;
+	v = (int)(long)(*val);
+	if (v < 0) {
+		if (sendipv4)
+			v = sock_get_ttl(sendipv4->socket);
+	}
+	if (v < 0) {
+		/* some error => use a reasonable default */
+		v = 63;
+	}
+	*val = (void*)(long)v;
+	return 0;
+}
+
+
+
 cfg_def_t core_cfg_def[] = {
 cfg_def_t core_cfg_def[] = {
 	{"debug",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
 	{"debug",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
 		"debug level"},
 		"debug level"},
@@ -179,7 +234,8 @@ cfg_def_t core_cfg_def[] = {
 	{"dns_search_full_match",	CFG_VAR_INT,	0, 1, 0, 0,
 	{"dns_search_full_match",	CFG_VAR_INT,	0, 1, 0, 0,
 		"enable/disable domain name checks against the search list "
 		"enable/disable domain name checks against the search list "
 		"in DNS answers"},
 		"in DNS answers"},
-	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup, resolv_reinit,
+	{"dns_reinit",		CFG_VAR_INT|CFG_INPUT_INT,	1, 1, dns_reinit_fixup,
+		resolv_reinit,
 		"set to 1 in order to reinitialize the DNS resolver"},
 		"set to 1 in order to reinitialize the DNS resolver"},
 	/* DNS cache */
 	/* DNS cache */
 #ifdef USE_DNS_CACHE
 #ifdef USE_DNS_CACHE
@@ -230,6 +286,16 @@ cfg_def_t core_cfg_def[] = {
 			" exceeds udp_mtu"},
 			" exceeds udp_mtu"},
 	{"udp_mtu_try_proto", CFG_VAR_INT, 1, 4, 0, fix_global_req_flags,
 	{"udp_mtu_try_proto", CFG_VAR_INT, 1, 4, 0, fix_global_req_flags,
 		"if send size > udp_mtu use proto (1 udp, 2 tcp, 3 tls, 4 sctp)"},
 		"if send size > udp_mtu use proto (1 udp, 2 tcp, 3 tls, 4 sctp)"},
+	{"udp4_raw", CFG_VAR_INT | CFG_ATOMIC, -1, 1, check_raw_sock_support, 0,
+		"enable/disable using a raw socket for sending UDP IPV4 packets."
+		" Should be  faster on multi-CPU linux running machines."},
+	{"udp4_raw_mtu", CFG_VAR_INT | CFG_ATOMIC, 28, 65535, 0, 0,
+		"set the MTU used when using raw sockets for udp sending."
+		" This  value will be used when deciding whether or not to fragment"
+		" the packets."},
+	{"udp4_raw_ttl", CFG_VAR_INT | CFG_ATOMIC, -1, 255, udp4_raw_ttl_fixup, 0,
+		"set the IP TTL used when using raw sockets for udp sending."
+		" -1 will use the same value as for normal udp sockets."},
 	{"force_rport",     CFG_VAR_INT, 0, 1,  0, fix_global_req_flags,
 	{"force_rport",     CFG_VAR_INT, 0, 1,  0, fix_global_req_flags,
 		"force rport for all the received messages" },
 		"force rport for all the received messages" },
 	{"memlog",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,
 	{"memlog",		CFG_VAR_INT|CFG_ATOMIC,	0, 0, 0, 0,

+ 7 - 5
cfg_core.h

@@ -36,12 +36,11 @@
  * -------
  * -------
  *  2007-12-03	Initial version (Miklos)
  *  2007-12-03	Initial version (Miklos)
  */
  */
-/*!
- * \file
- * \brief SIP-router core :: Core configuration
- * \ingroup core
+/** core runtime config.
+ * @file cfg_core.h
+ * @ingroup core
  *
  *
- * Module: \ref core
+ * Module: @ref core
  */
  */
 
 
 
 
@@ -103,6 +102,9 @@ struct cfg_group_core {
 	int max_while_loops;
 	int max_while_loops;
 	int udp_mtu; /*!< maximum send size for udp, if > try another protocol*/
 	int udp_mtu; /*!< maximum send size for udp, if > try another protocol*/
 	int udp_mtu_try_proto; /*!< if packet> udp_mtu, try proto (e.g. TCP) */
 	int udp_mtu_try_proto; /*!< if packet> udp_mtu, try proto (e.g. TCP) */
+	int udp4_raw; /* use raw sockets for sending on udp ipv 4 */
+	int udp4_raw_mtu; /* mtu used when using udp raw socket */
+	int udp4_raw_ttl; /* ttl used when using udp raw sockets */
 	int force_rport; /*!< if set rport will always be forced*/
 	int force_rport; /*!< if set rport will always be forced*/
 	int memlog; /*!< log level for memory status/summary info */
 	int memlog; /*!< log level for memory status/summary info */
 	int mem_summary; /*!< display memory status/summary info on exit */
 	int mem_summary; /*!< display memory status/summary info on exit */

+ 27 - 0
core_cmd.c

@@ -42,6 +42,7 @@
 #include "tcp_info.h"
 #include "tcp_info.h"
 #include "tcp_options.h"
 #include "tcp_options.h"
 #include "core_cmd.h"
 #include "core_cmd.h"
+#include "cfg_core.h"
 #ifdef USE_SCTP
 #ifdef USE_SCTP
 #include "sctp_options.h"
 #include "sctp_options.h"
 #include "sctp_server.h"
 #include "sctp_server.h"
@@ -843,6 +844,30 @@ static void core_sctpinfo(rpc_t* rpc, void* c)
 
 
 
 
 
 
+
+static const char* core_udp4rawinfo_doc[] = {
+	"Returns udp4_raw related info.",    /* Documentation string */
+	0                                     /* Method signature(s) */
+};
+
+static void core_udp4rawinfo(rpc_t* rpc, void* c)
+{
+#ifdef USE_RAW_SOCKS
+	void *handle;
+
+	rpc->add(c, "{", &handle);
+	rpc->struct_add(handle, "ddd",
+		"udp4_raw", cfg_get(core, core_cfg, udp4_raw),
+		"udp4_raw_mtu", cfg_get(core, core_cfg, udp4_raw_mtu),
+		"udp4_raw_ttl", cfg_get(core, core_cfg, udp4_raw_ttl)
+	);
+#else /* USE_RAW_SOCKS */
+	rpc->fault(c, 500, "udp4_raw mode support not compiled");
+#endif /* USE_RAW_SOCKS */
+}
+
+
+
 /*
 /*
  * RPC Methods exported by this module
  * RPC Methods exported by this module
  */
  */
@@ -876,6 +901,8 @@ static rpc_export_t core_rpc_methods[] = {
 	{"core.sctp_options",      core_sctp_options,      core_sctp_options_doc,
 	{"core.sctp_options",      core_sctp_options,      core_sctp_options_doc,
 		0},
 		0},
 	{"core.sctp_info",         core_sctpinfo,          core_sctpinfo_doc,   0},
 	{"core.sctp_info",         core_sctpinfo,          core_sctpinfo_doc,   0},
+	{"core.udp4_raw_info",     core_udp4rawinfo,       core_udp4rawinfo_doc,
+		0},
 #ifdef USE_DNS_CACHE
 #ifdef USE_DNS_CACHE
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,
 	{"dns.mem_info",          dns_cache_mem_info,     dns_cache_mem_info_doc,
 		0	},
 		0	},

+ 4 - 0
globals.h

@@ -64,6 +64,10 @@ extern struct socket_info* bind_address; /* pointer to the crt. proc.
 extern struct socket_info* sendipv4; /* ipv4 socket to use when msg.
 extern struct socket_info* sendipv4; /* ipv4 socket to use when msg.
 										comes from ipv6*/
 										comes from ipv6*/
 extern struct socket_info* sendipv6; /* same as above for ipv6 */
 extern struct socket_info* sendipv6; /* same as above for ipv6 */
+#ifdef USE_RAW_SOCKS
+extern int raw_udp4_send_sock;
+#endif /* USE_RAW_SOCKS */
+
 #ifdef USE_TCP
 #ifdef USE_TCP
 extern struct socket_info* sendipv4_tcp; /* ipv4 socket to use when msg.
 extern struct socket_info* sendipv4_tcp; /* ipv4 socket to use when msg.
 										comes from ipv6*/
 										comes from ipv6*/

+ 98 - 10
main.c

@@ -73,9 +73,9 @@
  * 2008-08-08  sctp support (andrei)
  * 2008-08-08  sctp support (andrei)
  * 2008-08-19  -l support for mmultihomed addresses/addresses lists
  * 2008-08-19  -l support for mmultihomed addresses/addresses lists
  *                (e.g. -l (eth0, 1.2.3.4, foo.bar) ) (andrei)
  *                (e.g. -l (eth0, 1.2.3.4, foo.bar) ) (andrei)
- *  2010-04-19 added daemon_status_fd pipe to communicate the parent process
- *             with the main process in daemonize mode, so the parent process
- *             can return the proper exit status code (ibc)
+ * 2010-04-19  added daemon_status_fd pipe to communicate the parent process
+ *              with the main process in daemonize mode, so the parent process
+ *              can return the proper exit status code (ibc)
  */
  */
 
 
 /** main file (init, daemonize, startup) 
 /** main file (init, daemonize, startup) 
@@ -145,6 +145,9 @@
 #include "nonsip_hooks.h"
 #include "nonsip_hooks.h"
 #include "ut.h"
 #include "ut.h"
 #include "signals.h"
 #include "signals.h"
+#ifdef USE_RAW_SOCKS
+#include "raw_sock.h"
+#endif /* USE_RAW_SOCKS */
 #ifdef USE_TCP
 #ifdef USE_TCP
 #include "poll_types.h"
 #include "poll_types.h"
 #include "tcp_init.h"
 #include "tcp_init.h"
@@ -185,6 +188,7 @@
 #include "pvapi_init.h" /* init */
 #include "pvapi_init.h" /* init */
 #include "pv_core.h" /* register core pvars */
 #include "pv_core.h" /* register core pvars */
 #include "ppcfg.h"
 #include "ppcfg.h"
+#include "sock_ut.h"
 
 
 #ifdef DEBUG_DMALLOC
 #ifdef DEBUG_DMALLOC
 #include <dmalloc.h>
 #include <dmalloc.h>
@@ -437,6 +441,9 @@ struct socket_info* bind_address=0; /* pointer to the crt. proc.
 									 listening address*/
 									 listening address*/
 struct socket_info* sendipv4; /* ipv4 socket to use when msg. comes from ipv6*/
 struct socket_info* sendipv4; /* ipv4 socket to use when msg. comes from ipv6*/
 struct socket_info* sendipv6; /* same as above for ipv6 */
 struct socket_info* sendipv6; /* same as above for ipv6 */
+#ifdef USE_RAW_SOCKS
+int raw_udp4_send_sock = -1; /* raw socket used for sending udp4 packets */
+#endif /* USE_RAW_SOCKS */
 #ifdef USE_TCP
 #ifdef USE_TCP
 struct socket_info* sendipv4_tcp;
 struct socket_info* sendipv4_tcp;
 struct socket_info* sendipv6_tcp;
 struct socket_info* sendipv6_tcp;
@@ -1236,15 +1243,57 @@ int main_loop()
 		/* only one address, we ignore all the others */
 		/* only one address, we ignore all the others */
 		if (udp_init(udp_listen)==-1) goto error;
 		if (udp_init(udp_listen)==-1) goto error;
 		bind_address=udp_listen;
 		bind_address=udp_listen;
-		if (bind_address->address.af==AF_INET)
+		if (bind_address->address.af==AF_INET) {
 			sendipv4=bind_address;
 			sendipv4=bind_address;
-		else
+#ifdef USE_RAW_SOCKS
+		/* always try to have a raw socket opened if we are using ipv4 */
+		raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
+		if (raw_udp4_send_sock < 0) {
+			if ( default_core_cfg.udp4_raw > 0) {
+				/* force use raw socket failed */
+				ERR("could not initialize raw udp send socket (ipv4):"
+						" %s (%d)\n", strerror(errno), errno);
+				if (errno == EPERM)
+					ERR("could not initialize raw socket on startup"
+						" due to inadequate permissions, please"
+						" restart as root or with CAP_NET_RAW\n");
+				goto error;
+			}
+			default_core_cfg.udp4_raw = 0; /* disabled */
+		} else {
+			register_fds(1);
+			if (default_core_cfg.udp4_raw < 0) {
+				/* auto-detect => use it */
+				default_core_cfg.udp4_raw = 1; /* enabled */
+				DBG("raw socket possible => turning it on\n");
+			}
+			if (default_core_cfg.udp4_raw_ttl < 0) {
+				/* auto-detect */
+				default_core_cfg.udp4_raw_ttl = sock_get_ttl(sendipv4->socket);
+				if (default_core_cfg.udp4_raw_ttl < 0)
+					/* error, use some default value */
+					default_core_cfg.udp4_raw_ttl = 63;
+			}
+		}
+#else
+		default_core.cfg.udp4_raw = 0;
+#endif /* USE_RAW_SOCKS */
+		} else
 			sendipv6=bind_address;
 			sendipv6=bind_address;
 		if (udp_listen->next){
 		if (udp_listen->next){
 			LOG(L_WARN, "WARNING: using only the first listen address"
 			LOG(L_WARN, "WARNING: using only the first listen address"
 						" (no fork)\n");
 						" (no fork)\n");
 		}
 		}
 
 
+		/* delay cfg_shmize to the last moment (it must be called _before_
+		   forking). Changes to default cfgs after this point will be
+		   ignored.
+		*/
+		if (cfg_shmize() < 0) {
+			LOG(L_CRIT, "could not initialize shared configuration\n");
+			goto error;
+		}
+	
 		/* Register the children that will keep updating their
 		/* Register the children that will keep updating their
 		 * local configuration */
 		 * local configuration */
 		cfg_register_child(
 		cfg_register_child(
@@ -1363,6 +1412,42 @@ int main_loop()
 			/* children_no per each socket */
 			/* children_no per each socket */
 			cfg_register_child(children_no);
 			cfg_register_child(children_no);
 		}
 		}
+#ifdef USE_RAW_SOCKS
+		/* always try to have a raw socket opened if we are using ipv4 */
+		if (sendipv4) {
+			raw_udp4_send_sock = raw_socket(IPPROTO_RAW, 0, 0, 1);
+			if (raw_udp4_send_sock < 0) {
+				if ( default_core_cfg.udp4_raw > 0) {
+						/* force use raw socket failed */
+						ERR("could not initialize raw udp send socket (ipv4):"
+								" %s (%d)\n", strerror(errno), errno);
+						if (errno == EPERM)
+							ERR("could not initialize raw socket on startup"
+								" due to inadequate permissions, please"
+								" restart as root or with CAP_NET_RAW\n");
+						goto error;
+					}
+					default_core_cfg.udp4_raw = 0; /* disabled */
+			} else {
+				register_fds(1);
+				if (default_core_cfg.udp4_raw < 0) {
+					/* auto-detect => use it */
+					default_core_cfg.udp4_raw = 1; /* enabled */
+					DBG("raw socket possible => turning it on\n");
+				}
+				if (default_core_cfg.udp4_raw_ttl < 0) {
+					/* auto-detect */
+					default_core_cfg.udp4_raw_ttl =
+						sock_get_ttl(sendipv4->socket);
+					if (default_core_cfg.udp4_raw_ttl < 0)
+						/* error, use some default value */
+						default_core_cfg.udp4_raw_ttl = 63;
+				}
+			}
+		}
+#else
+		default_core_cfg.udp4_raw = 0;
+#endif /* USE_RAW_SOCKS */
 #ifdef USE_SCTP
 #ifdef USE_SCTP
 		if (!sctp_disable){
 		if (!sctp_disable){
 			for(si=sctp_listen; si; si=si->next){
 			for(si=sctp_listen; si; si=si->next){
@@ -1428,6 +1513,14 @@ int main_loop()
 			 * sending) so we open all first*/
 			 * sending) so we open all first*/
 		if (do_suid()==-1) goto error; /* try to drop privileges */
 		if (do_suid()==-1) goto error; /* try to drop privileges */
 
 
+		/* delay cfg_shmize to the last moment (it must be called _before_
+		   forking). Changes to default cfgs after this point will be
+		   ignored (cfg_shmize() will copy the default cfgs into shmem).
+		*/
+		if (cfg_shmize() < 0) {
+			LOG(L_CRIT, "could not initialize shared configuration\n");
+			goto error;
+		}
 		/* init childs with rank==PROC_INIT before forking any process,
 		/* init childs with rank==PROC_INIT before forking any process,
 		 * this is a place for delayed (after mod_init) initializations
 		 * this is a place for delayed (after mod_init) initializations
 		 * (e.g. shared vars that depend on the total number of processes
 		 * (e.g. shared vars that depend on the total number of processes
@@ -2291,11 +2384,6 @@ try_select_again:	tval.tv_usec = 0;
 		goto error;
 		goto error;
 	}
 	}
 	
 	
-	if (cfg_shmize() < 0) {
-		LOG(L_CRIT, "could not initialize shared configuration\n");
-		goto error;
-	}
-	
 	/* initialize process_table, add core process no. (calc_proc_no()) to the
 	/* initialize process_table, add core process no. (calc_proc_no()) to the
 	 * processes registered from the modules*/
 	 * processes registered from the modules*/
 	if (init_pt(calc_proc_no())==-1)
 	if (init_pt(calc_proc_no())==-1)

+ 144 - 0
raw_listener.c

@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** raw socket udp listen functions.
+ *  @file raw_listener.c
+ *  @ingroup core
+ *  Module: @ref core
+ */
+/*
+ * History:
+ * --------
+ *  2010-06-09  intial version (from older code) andrei
+ */
+
+#ifdef USE_RAW_SOCKS
+
+
+#include "raw_listener.h"
+#include "raw_sock.h"
+#include "receive.h"
+
+#include <errno.h>
+#include <string.h>
+
+struct socket_info* raw_udp_sendipv4=0;
+
+/** creates a raw socket based on a socket_info structure.
+ * Side-effects: sets raw_udp_sendipv4 if not already set.
+ * @param si - pointer to partially filled socket_info structure (su must
+ *              be set).
+ * @param iface - pointer to network interface to bind on (str). Can be null.
+ * @param iphdr_incl - 1 if send on these socket will include the IP header.
+ * @return <0 on error, socket on success.
+ */
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl)
+{
+	int sock;
+	struct ip_addr ip;
+	
+	su2ip_addr(&ip, &si->su);
+	sock=raw_udp4_socket(&ip, iface, iphdr_incl);
+	if (sock>=0){
+		if (raw_udp_sendipv4==0 || iface==0 || iface->s==0)
+			raw_udp_sendipv4=si;
+	}
+	return sock;
+}
+
+
+
+/** receive sip udp ipv4 packets over a raw socket in a loop.
+ * It should be called by a "raw socket receiver" process
+ * (since the function never exits unless it encounters a
+ *  critical error).
+ * @param rsock - initialized raw socket.
+ * @param port1 - start of port range.
+ * @param port2 - end of port range. If 0 it's equivalent to listening only
+ *                on port1.
+ * @return <0 on error, never returns on success.
+ */
+int raw_udp4_rcv_loop(int rsock, int port1, int port2)
+{
+	static char buf[BUF_SIZE+1];
+	char* p;
+	char* tmp;
+	union sockaddr_union from;
+	union sockaddr_union to;
+	struct receive_info ri;
+	struct raw_filter rf;
+	int len;
+	
+	/* this will not change */
+	from.sin.sin_family=AF_INET;
+	ri.bind_address=0;
+	ri.proto=PROTO_UDP;
+	ri.proto_reserved1=0;
+	ri.proto_reserved2=0;
+	/* set filter to match any address but with the specified port range */
+	memset(&rf, 0, sizeof(rf));
+	rf.dst.ip.af=AF_INET;
+	rf.dst.ip.len=4;
+	rf.dst.mask.af=AF_INET;
+	rf.dst.mask.len=4;
+	rf.proto=PROTO_UDP;
+	rf.port1=port1;
+	rf.port2=port2?port2:port1;
+	for(;;){
+		p=buf;
+		len=raw_udp4_recv(rsock, &p, BUF_SIZE, &from, &to, &rf);
+		if (len<0){
+			if (len==-1){
+				LOG(L_ERR, "ERROR: raw_udp4_rcv_loop:raw_udp4_recv: %s [%d]\n",
+						strerror(errno), errno);
+				if ((errno==EINTR)||(errno==EWOULDBLOCK))
+					continue;
+				else
+					goto error;
+			}else{
+				DBG("raw_udp4_rcv_loop: raw_udp4_recv error: %d\n", len);
+				continue;
+			}
+		}
+		/* we must 0-term the message */
+		p[len]=0;
+		ri.src_su=from;
+		su2ip_addr(&ri.src_ip, &from);
+		ri.src_port=su_getport(&from);
+		su2ip_addr(&ri.dst_ip, &to);
+		ri.dst_port=su_getport(&to);
+		/* sanity checks */
+		if (len<MIN_UDP_PACKET){
+			tmp=ip_addr2a(&ri.src_ip);
+			DBG("raw_udp4_rcv_loop: probing packet received from %s %d\n",
+					tmp, htons(ri.src_port));
+			continue;
+		}
+		if (ri.src_port==0){
+			tmp=ip_addr2a(&ri.src_ip);
+			LOG(L_INFO, "raw_udp4_rcv_loop: dropping 0 port packet from %s\n",
+						tmp);
+			continue;
+		}
+		tmp=ip_addr2a(&ri.src_ip);
+		DBG("raw_udp4_rcv_loop: received from %s:\n[%.*s]\n", tmp, len, p);
+		receive_msg(p, len, &ri);
+	}
+error:
+	return -1;
+}
+
+
+#endif /* USE_RAW_SOCKS */

+ 39 - 0
raw_listener.h

@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** raw socket udp listen functions.
+ *  @file raw_listener.h
+ *  @ingroup core
+ *  Module: @ref core
+ */
+/*
+ * History:
+ * --------
+ *  2010-06-09  initial version (from older code) andrei
+ */
+
+#ifndef _raw_listener_h
+#define _raw_listener_h
+
+#include "ip_addr.h"
+
+
+/** default raw socket used for sending on udp ipv4 */
+struct socket_info* raw_udp_sendipv4;
+
+int raw_listener_init(struct socket_info* si, str* iface, int iphdr_incl);
+int raw_udp4_rcv_loop(int rsock, int port1, int port2);
+
+#endif /* _raw_listener_h */

+ 696 - 0
raw_sock.c

@@ -0,0 +1,696 @@
+/* 
+ * $Id$
+ *
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** raw socket functions.
+ *  @file raw_sock.c
+ *  @ingroup core
+ *  Module: @ref core
+ */
+/* 
+ * History:
+ * --------
+ *  2010-06-07  initial version (from older code) andrei
+ *  2010-06-15  IP_HDRINCL raw socket support, including on-send
+ *               fragmentation (andrei)
+ */
+
+#ifdef USE_RAW_SOCKS
+
+#include "compiler_opt.h"
+#include "ip_addr.h"
+#include "dprint.h"
+#include "str.h"
+#include "rand/fastrand.h"
+#include "globals.h"
+
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <arpa/inet.h>
+#ifndef __USE_BSD
+#define __USE_BSD  /* on linux use bsd version of iphdr (more portable) */
+#endif /* __USE_BSD */
+#include <netinet/ip.h>
+#define __FAVOR_BSD /* on linux use bsd version of udphdr (more portable) */
+#include <netinet/udp.h>
+
+#include "raw_sock.h"
+#include "cfg/cfg.h"
+#include "cfg_core.h"
+
+
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined(__OS_openbsd) \
+	|| defined (__OS_darwin)
+/** fragmentation is done by the kernel (no need to do it in userspace) */
+#define RAW_IPHDR_INC_AUTO_FRAG
+#endif /* __OS_* */
+
+/* macros for converting values in the expected format */
+#if defined (__OS_freebsd) || defined (__OS_netbsd) || defined (__OS_darwin)
+/* on freebsd and netbsd the ip offset (along with flags) and the
+   ip header length must be filled in _host_ bytes order format.
+   The same is true for openbsd < 2.1.
+*/
+/** convert the ip offset in the format expected by the kernel. */
+#define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
+/** convert the ip total length in the format expected by the kernel. */
+#define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
+
+#else /* __OS_* */
+/* linux, openbsd >= 2.1 a.s.o. */
+/** convert the ip offset in the format expected by the kernel. */
+#define RAW_IPHDR_IP_OFF(off)  htons((unsigned short)(off))
+/** convert the ip total length in the format expected by the kernel. */
+#define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
+
+#endif /* __OS_* */
+
+
+/** create and return a raw socket.
+ * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
+ * @param ip - if not null the socket will be bound on this ip.
+ * @param iface - if not null the socket will be bound to this interface
+ *                (SO_BINDTODEVICE). This is supported only on linux.
+ * @param iphdr_incl - set to 1 if packets send on this socket include
+ *                     a pre-built ip header (some fields, like the checksum
+ *                     will still be filled by the kernel, OTOH packet
+ *                     fragmentation has to be done in user space).
+ * @return socket on success, -1 on error
+ */
+int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
+{
+	int sock;
+	int t;
+	union sockaddr_union su;
+#if defined (SO_BINDTODEVICE)
+	char short_ifname[sizeof(int)];
+	int ifname_len;
+	char* ifname;
+#endif /* SO_BINDTODEVICE */
+
+	sock = socket(PF_INET, SOCK_RAW, proto);
+	if (sock==-1)
+		goto error;
+	/* set socket options */
+	if (iphdr_incl) {
+		t=1;
+		if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &t, sizeof(t))<0){
+			ERR("raw_socket: setsockopt(IP_HDRINCL) failed: %s [%d]\n",
+					strerror(errno), errno);
+			goto error;
+		}
+	} else {
+		/* IP_PKTINFO makes no sense if the ip header is included */
+		/* using IP_PKTINFO */
+		t=1;
+#ifdef IP_PKTINFO
+		if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
+			ERR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
+					strerror(errno), errno);
+			goto error;
+		}
+#elif defined(IP_RECVDSTADDR)
+		if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
+			ERR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
+					strerror(errno), errno);
+			goto error;
+		}
+#else
+#error "no method of getting the destination ip address supported"
+#endif /* IP_RECVDSTADDR / IP_PKTINFO */
+	}
+#if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
+	t=IP_PMTUDISC_DONT;
+	if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
+		ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
+				strerror(errno));
+		goto error;
+	}
+#endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
+	if (iface && iface->s){
+#if defined (SO_BINDTODEVICE)
+		/* workaround for linux bug: arg to setsockopt must have at least
+		 * sizeof(int) size or EINVAL would be returned */
+		if (iface->len<sizeof(int)){
+			memcpy(short_ifname, iface->s, iface->len);
+			short_ifname[iface->len]=0; /* make sure it's zero term */
+			ifname_len=sizeof(short_ifname);
+			ifname=short_ifname;
+		}else{
+			ifname_len=iface->len;
+			ifname=iface->s;
+		}
+		if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, ifname, ifname_len)
+						<0){
+				ERR("raw_socket: could not bind to %.*s: %s [%d]\n",
+							iface->len, ZSW(iface->s), strerror(errno), errno);
+				goto error;
+		}
+#else /* !SO_BINDTODEVICE */
+		/* SO_BINDTODEVICE is linux specific => cannot bind to a device */
+		ERR("raw_socket: bind to device supported only on linux\n");
+		goto error;
+#endif /* SO_BINDTODEVICE */
+	}
+	/* FIXME: probe_max_receive_buffer(sock) missing */
+	if (ip){
+		init_su(&su, ip, 0);
+		if (bind(sock, &su.s, sockaddru_len(su))==-1){
+			ERR("raw_socket: bind(%s) failed: %s [%d]\n",
+				ip_addr2a(ip), strerror(errno), errno);
+			goto error;
+		}
+	}
+	return sock;
+error:
+	if (sock!=-1) close(sock);
+	return -1;
+}
+
+
+
+/** create and return an udp over ipv4  raw socket.
+ * @param ip - if not null the socket will be bound on this ip.
+ * @param iface - if not null the socket will be bound to this interface
+ *                (SO_BINDTODEVICE).
+ * @param iphdr_incl - set to 1 if packets send on this socket include
+ *                     a pre-built ip header (some fields, like the checksum
+ *                     will still be filled by the kernel, OTOH packet
+ *                     fragmentation has to be done in user space).
+ * @return socket on success, -1 on error
+ */
+int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
+{
+	return raw_socket(IPPROTO_UDP, ip, iface, iphdr_incl);
+}
+
+
+
+/** receives an ipv4 packet using a raw socket.
+ * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
+ * from and to are filled (only the ip part the ports are 0 since this
+ * function doesn't try to look beyond the IP level).
+ * @param sock - raw socket
+ * @param buf - detination buffer.
+ * @param len - buffer len (should be enough for receiving a packet +
+ *               IP header).
+ * @param from - result parameter, the IP address part of it will be filled
+ *                with the source address and the port with 0.
+ * @param to - result parameter, the IP address part of it will be filled
+ *                with the destination (local) address and the port with 0.
+ * @return packet len or <0 on error: -1 (check errno),
+ *        -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
+ */
+int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
+					union sockaddr_union* to)
+{
+	struct iovec iov[1];
+	struct msghdr rcv_msg;
+	struct cmsghdr* cmsg;
+#ifdef IP_PKTINFO
+	struct in_pktinfo* rcv_pktinfo;
+#endif /* IP_PKTINFO */
+	int n, ret;
+	char msg_ctrl_buf[1024];
+
+	iov[0].iov_base=buf;
+	iov[0].iov_len=len;
+	rcv_msg.msg_name=from;
+	rcv_msg.msg_namelen=sockaddru_len(*from);
+	rcv_msg.msg_control=msg_ctrl_buf;
+	rcv_msg.msg_controllen=sizeof(msg_ctrl_buf);
+	rcv_msg.msg_iov=&iov[0];
+	rcv_msg.msg_iovlen=1;
+	ret=-2; /* no PKT_INFO or AF mismatch */
+retry:
+	n=recvmsg(sock, &rcv_msg, MSG_WAITALL);
+	if (unlikely(n==-1)){
+		if (errno==EINTR)
+			goto retry;
+		ret=n;
+		goto end;
+	}
+	/* find the pkt info */
+	for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg; cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
+#ifdef IP_PKTINFO
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
+					(cmsg->cmsg_type==IP_PKTINFO))) {
+			rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
+			to->sin.sin_family=AF_INET;
+			memcpy(&to->sin.sin_addr, &rcv_pktinfo->ipi_spec_dst.s_addr, 
+									sizeof(to->sin.sin_addr));
+			to->sin.sin_port=0; /* not known */
+			/* interface no. in ipi_ifindex */
+			ret=n; /* success */
+			break;
+		}
+#elif defined (IP_RECVDSTADDR)
+		if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
+					(cmsg->cmsg_type==IP_RECVDSTADDR))) {
+			to->sin.sin_family=AF_INET;
+			memcpy(&to->sin.sin_addr, CMSG_DATA(cmsg),
+									sizeof(to->sin.sin_addr));
+			to->sin.sin_port=0; /* not known */
+			ret=n; /* success */
+			break;
+		}
+#else
+#error "no method of getting the destination ip address supported"
+#endif /* IP_PKTINFO / IP_RECVDSTADDR */
+	}
+end:
+	return ret;
+}
+
+
+
+/* receive an ipv4 udp packet over a raw socket.
+ * The packet is copied in *buf and *buf is advanced to point to the
+ * payload.  Fills from and to.
+ * @param rsock - raw socket
+ * @param buf - the packet will be written to where *buf points intially and
+ *              then *buf will be advanced to point to the udp payload.
+ * @param len - buffer length (should be enough to hold at least the
+ *               ip and udp headers + 1 byte).
+ * @param from - result parameter, filled with source address and port of the
+ *               packet.
+ * @param from - result parameter, filled with destination (local) address and
+ *               port of the packet.
+ * @param rf   - filter used to decide whether or not the packet is
+ *                accepted/processed. If null, all the packets are accepted.
+ * @return packet len or  <0 on error (-1 and -2 on recv error @see recvpkt4,
+ *         -3 if the headers are invalid and -4 if the packet doesn't
+ *         match the  filter).
+ */
+int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
+					union sockaddr_union* to, struct raw_filter* rf)
+{
+	int n;
+	unsigned short dst_port;
+	unsigned short src_port;
+	struct ip_addr dst_ip;
+	char* end;
+	char* udph_start;
+	char* udp_payload;
+	struct ip iph;
+	struct udphdr udph;
+	unsigned short udp_len;
+
+	n=recvpkt4(rsock, *buf, len, from, to);
+	if (unlikely(n<0)) goto error;
+	
+	end=*buf+n;
+	if (unlikely(n<(sizeof(struct ip)+sizeof(struct udphdr)))) {
+		n=-3;
+		goto error;
+	}
+	/* FIXME: if initial buffer is aligned, one could skip the memcpy
+	   and directly cast ip and udphdr pointer to the memory */
+	memcpy(&iph, *buf, sizeof(struct ip));
+	udph_start=*buf+iph.ip_hl*4;
+	udp_payload=udph_start+sizeof(struct udphdr);
+	if (unlikely(udp_payload>end)){
+		n=-3;
+		goto error;
+	}
+	memcpy(&udph, udph_start, sizeof(struct udphdr));
+	udp_len=ntohs(udph.uh_ulen);
+	if (unlikely((udph_start+udp_len)!=end)){
+		if ((udph_start+udp_len)>end){
+			n=-3;
+			goto error;
+		}else{
+			ERR("udp length too small: %d/%d\n",
+					(int)udp_len, (int)(end-udph_start));
+			n=-3;
+			goto error;
+		}
+	}
+	/* advance buf */
+	*buf=udp_payload;
+	n=(int)(end-*buf);
+	/* fill ip from the packet (needed if no PKT_INFO is used) */
+	dst_ip.af=AF_INET;
+	dst_ip.len=4;
+	dst_ip.u.addr32[0]=iph.ip_dst.s_addr;
+	/* fill dst_port */
+	dst_port=ntohs(udph.uh_dport);
+	ip_addr2su(to, &dst_ip, dst_port);
+	/* fill src_port */
+	src_port=ntohs(udph.uh_sport);
+	su_setport(from, src_port);
+	if (likely(rf)) {
+		su2ip_addr(&dst_ip, to);
+		if ( (dst_port && rf->port1 && ((dst_port<rf->port1) ||
+										(dst_port>rf->port2)) ) ||
+			(matchnet(&dst_ip, &rf->dst)!=1) ){
+			/* no match */
+			n=-4;
+			goto error;
+		}
+	}
+	
+error:
+	return n;
+}
+
+
+
+/** udp checksum helper: compute the pseudo-header 16-bit "sum".
+ * Computes the partial checksum (no complement) of the pseudo-header.
+ * It is meant to be used by udpv4_chksum().
+ * @param uh - filled udp header
+ * @param src - source ip address in network byte order.
+ * @param dst - destination ip address in network byte order.
+ * @param length - payload length (not including the udp header),
+ *                 in _host_ order.
+ * @return the partial checksum in host order
+ */
+inline unsigned short udpv4_vhdr_sum(	struct udphdr* uh,
+										struct in_addr* src,
+										struct in_addr* dst,
+										unsigned short length)
+{
+	unsigned sum;
+	
+	/* pseudo header */
+	sum=(src->s_addr>>16)+(src->s_addr&0xffff)+
+		(dst->s_addr>>16)+(dst->s_addr&0xffff)+
+		htons(IPPROTO_UDP)+(uh->uh_ulen);
+	/* udp header */
+	sum+=(uh->uh_dport)+(uh->uh_sport)+(uh->uh_ulen) + 0 /*chksum*/; 
+	/* fold it */
+	sum=(sum>>16)+(sum&0xffff);
+	sum+=(sum>>16);
+	/* no complement */
+	return ntohs((unsigned short) sum);
+}
+
+
+
+/** compute the udp over ipv4 checksum.
+ * @param u - filled udp header (except checksum).
+ * @param src - source ip v4 address, in _network_ byte order.
+ * @param dst - destination ip v4 address, int _network_ byte order.
+ * @param data - pointer to the udp payload.
+ * @param length - payload length, not including the udp header and in
+ *                 _host_ order. The length mist be <= 0xffff - 8
+ *                 (to allow space for the udp header).
+ * @return the checksum in _host_ order */
+inline static unsigned short udpv4_chksum(struct udphdr* u,
+							struct in_addr* src, struct in_addr* dst,
+							unsigned char* data, unsigned short length)
+{
+	unsigned sum;
+	unsigned char* end;
+	sum=udpv4_vhdr_sum(u, src, dst, length);
+	end=data+(length&(~0x1)); /* make sure it's even */
+	/* TODO: 16 & 32 bit aligned version */
+		/* not aligned */
+		for(;data<end;data+=2){
+			sum+=((data[0]<<8)+data[1]);
+		}
+		if (length&0x1)
+			sum+=((*data)<<8);
+	
+	/* fold it */
+	sum=(sum>>16)+(sum&0xffff);
+	sum+=(sum>>16);
+	return (unsigned short)~sum;
+}
+
+
+
+/** fill in an udp header.
+ * @param u - udp header that will be filled.
+ * @param from - source ip v4 address and port.
+ * @param to -   destination ip v4 address and port.
+ * @param buf - pointer to the payload.
+ * @param len - payload length (not including the udp header).
+ * @param do_chk - if set the udp checksum will be computed, else it will
+ *                 be set to 0.
+ * @return 0 on success, < 0 on error.
+ */
+inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from, 
+				struct sockaddr_in* to, unsigned char* buf, int len,
+					int do_chk)
+{
+	u->uh_ulen=htons((unsigned short)len+sizeof(struct udphdr));
+	u->uh_sport=from->sin_port;
+	u->uh_dport=to->sin_port;
+	if (do_chk)
+		u->uh_sum=htons(
+				udpv4_chksum(u, &from->sin_addr, &to->sin_addr,  buf, len));
+	else
+		u->uh_sum=0; /* no checksum */
+	return 0;
+}
+
+
+
+/** fill in an ip header.
+ * Note: the checksum is _not_ computed.
+ * WARNING: The ip header length and offset might be filled in
+ * _host_ byte order or network byte order (depending on the OS, for example
+ *  freebsd needs host byte order for raw sockets with IPHDR_INC, while
+ *  linux needs network byte order).
+ * @param iph - ip header that will be filled.
+ * @param from - source ip v4 address (network byte order).
+ * @param to -   destination ip v4 address (network byte order).
+ * @param payload len - payload length (not including the ip header).
+ * @param proto - protocol.
+ * @return 0 on success, < 0 on error.
+ */
+inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
+				struct in_addr* to, int payload_len, unsigned char proto)
+{
+	iph->ip_hl = sizeof(struct ip)/4;
+	iph->ip_v = 4;
+	iph->ip_tos = tos;
+	/* on freebsd ip_len _must_ be in _host_ byte order instead
+	   of network byte order. On linux the length is ignored (it's filled
+	   automatically every time). */
+	iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
+	iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
+	iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
+	iph->ip_ttl = cfg_get(core, core_cfg, udp4_raw_ttl);
+	iph->ip_p = proto;
+	iph->ip_src = *from;
+	iph->ip_dst = *to;
+	iph->ip_sum = 0;
+
+	return 0;
+}
+
+
+
+/** send an udp packet over a non-ip_hdrincl raw socket.
+ * @param rsock - raw socket
+ * @param buf - data
+ * @param len - data len
+ * @param from - source address:port (_must_ be non-null, but the ip address
+ *                can be 0, in which case it will be filled by the kernel).
+ * @param to - destination address:port
+ * @return  <0 on error (errno set too), number of bytes sent on success
+ *          (including the udp header => on success len + udpheader size).
+ */
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
+					union sockaddr_union* from,
+					union sockaddr_union* to)
+{
+	struct msghdr snd_msg;
+	struct cmsghdr* cmsg;
+#ifdef IP_PKTINFO
+	struct in_pktinfo* snd_pktinfo;
+#endif /* IP_PKTINFO */
+	struct iovec iov[2];
+	struct udphdr udp_hdr;
+	char msg_ctrl_snd_buf[1024];
+	int ret;
+
+	memset(&snd_msg, 0, sizeof(snd_msg));
+	snd_msg.msg_name=&to->sin;
+	snd_msg.msg_namelen=sockaddru_len(*to);
+	snd_msg.msg_iov=&iov[0];
+	/* prepare udp header */
+	mk_udp_hdr(&udp_hdr, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
+	iov[0].iov_base=(char*)&udp_hdr;
+	iov[0].iov_len=sizeof(udp_hdr);
+	iov[1].iov_base=buf;
+	iov[1].iov_len=len;
+	snd_msg.msg_iovlen=2;
+	snd_msg.msg_control=msg_ctrl_snd_buf;
+	snd_msg.msg_controllen=sizeof(msg_ctrl_snd_buf);
+	/* init pktinfo cmsg */
+	cmsg=CMSG_FIRSTHDR(&snd_msg);
+	cmsg->cmsg_level=IPPROTO_IP;
+#ifdef IP_PKTINFO
+	cmsg->cmsg_type=IP_PKTINFO;
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
+	snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
+	snd_pktinfo->ipi_ifindex=0;
+	snd_pktinfo->ipi_spec_dst.s_addr=from->sin.sin_addr.s_addr;
+#elif defined (IP_SENDSRCADDR)
+	cmsg->cmsg_type=IP_SENDSRCADDR;
+	cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
+	memcpy(CMSG_DATA(cmsg), &from->sin.sin_addr.s_addr,
+							sizeof(struct in_addr));
+#else
+#error "no method of setting the source ip supported"
+#endif /* IP_PKTINFO / IP_SENDSRCADDR */
+	snd_msg.msg_controllen=cmsg->cmsg_len;
+	snd_msg.msg_flags=0;
+	ret=sendmsg(rsock, &snd_msg, 0);
+	return ret;
+}
+
+
+
+/** send an udp packet over an IP_HDRINCL raw socket.
+ * If needed, send several fragments.
+ * @param rsock - raw socket
+ * @param buf - data
+ * @param len - data len
+ * @param from - source address:port (_must_ be non-null, but the ip address
+ *                can be 0, in which case it will be filled by the kernel).
+ * @param to - destination address:port
+ * @param mtu - maximum datagram size (including the ip header, excluding
+ *              link layer headers). Minimum allowed size is 28
+ *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
+ *               be ignored (the packet will be sent un-fragmented).
+ *              0 can be used to disable fragmentation.
+ * @return  <0 on error (-2: datagram too big, -1: check errno),
+ *          number of bytes sent on success
+ *          (including the ip & udp headers =>
+ *               on success len + udpheader + ipheader size).
+ */
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
+						union sockaddr_union* from,
+						union sockaddr_union* to, unsigned short mtu)
+{
+	struct msghdr snd_msg;
+	struct iovec iov[2];
+	struct ip_udp_hdr {
+		struct ip ip;
+		struct udphdr udp;
+	} hdr;
+	unsigned int totlen;
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
+	unsigned int ip_frag_size; /* fragment size */
+	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
+	unsigned int ip_payload;
+	unsigned int last_frag_offs;
+	void* last_frag_start;
+	int frg_no;
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
+	int ret;
+
+	totlen = len + sizeof(hdr);
+	if (unlikely(totlen) > 65535)
+		return -2;
+	memset(&snd_msg, 0, sizeof(snd_msg));
+	snd_msg.msg_name=&to->sin;
+	snd_msg.msg_namelen=sockaddru_len(*to);
+	snd_msg.msg_iov=&iov[0];
+	/* prepare the udp & ip headers */
+	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
+	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
+				len + sizeof(hdr.udp), IPPROTO_UDP);
+	iov[0].iov_base=(char*)&hdr;
+	iov[0].iov_len=sizeof(hdr);
+	snd_msg.msg_iovlen=2;
+	snd_msg.msg_control=0;
+	snd_msg.msg_controllen=0;
+	snd_msg.msg_flags=0;
+	/* this part changes for different fragments */
+	/* packets are fragmented if mtu has a valid value (at least an
+	   IP header + UDP header fit in it) and if the total length is greater
+	   then the mtu */
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
+	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
+		iov[1].iov_base=buf;
+		iov[1].iov_len=len;
+		ret=sendmsg(rsock, &snd_msg, 0);
+#ifndef RAW_IPHDR_INC_AUTO_FRAG
+	} else {
+		ip_payload = len + sizeof(hdr.udp);
+		/* a fragment offset must be a multiple of 8 => its size must
+		   also be a multiple of 8, except for the last fragment */
+		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
+		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
+		frg_no = ip_payload / ip_frag_size +
+				 ((ip_payload % ip_frag_size) > last_frag_extra);
+		/*ip_last_frag_size = ip_payload % frag_size +
+							((ip_payload % frag_size) <= last_frag_extra) *
+							ip_frag_size; */
+		last_frag_offs = (frg_no - 1) * ip_frag_size;
+		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
+		   => last_frag_offs >= sizeof(hdr.udp) */
+		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
+		hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0
+											  (if 0 the kernel will fill it) */
+		/* send the first fragment */
+		iov[1].iov_base=buf;
+		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
+		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
+		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
+		ret=sendmsg(rsock, &snd_msg, 0);
+		if (unlikely(ret < 0))
+			goto end;
+		/* all the other fragments, include only the ip header */
+		iov[0].iov_len = sizeof(hdr.ip);
+		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
+		/* fragments between the first and the last */
+		while(unlikely(iov[1].iov_base < last_frag_start)) {
+			iov[1].iov_len = ip_frag_size;
+			hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
+			/* set MF  */
+			hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
+									(((char*)iov[1].iov_base - (char*)buf +
+										sizeof(hdr.udp)) / 8) | 0x2000 );
+			ret=sendmsg(rsock, &snd_msg, 0);
+			if (unlikely(ret < 0))
+				goto end;
+			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
+		}
+		/* last fragment */
+		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
+		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
+		/* don't set MF (last fragment) */
+		hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
+									(((char*)iov[1].iov_base - (char*)buf +
+										sizeof(hdr.udp)) / 8) );
+		ret=sendmsg(rsock, &snd_msg, 0);
+		if (unlikely(ret < 0))
+			goto end;
+	}
+end:
+#endif /* RAW_IPHDR_INC_AUTO_FRAG */
+	return ret;
+}
+
+
+
+#endif /* USE_RAW_SOCKS */

+ 56 - 0
raw_sock.h

@@ -0,0 +1,56 @@
+/*
+ * $Id$
+ *
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** raw socket functions.
+ *  @file raw_sock.c
+ *  @ingroup core
+ *  Module: @ref core
+ */
+/* 
+ * History:
+ * --------
+ *  2010-06-07  initial version (from older code) andrei
+ */
+
+#ifndef _raw_sock_h
+#define _raw_sock_h
+
+#include "ip_addr.h"
+
+/** filter for limiting packets received on raw sockets. */
+struct raw_filter{
+	struct net   dst;
+	unsigned short port1;
+	unsigned short port2;
+	char proto;
+};
+
+
+int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl);
+int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl);
+int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
+					union sockaddr_union* to);
+int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
+					union sockaddr_union* to, struct raw_filter* rf);
+int raw_udp4_send(int rsock, char* buf, unsigned int len,
+					union sockaddr_union* from,
+					union sockaddr_union* to);
+int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
+						union sockaddr_union* from,
+						union sockaddr_union* to, unsigned short mtu);
+
+#endif /* _raw_sock_h */

+ 71 - 0
sock_ut.c

@@ -0,0 +1,71 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** various socket related functions.
+ * @file sock_ut.c
+ * @ingroup: core 
+ */
+/*
+ * History:
+ * --------
+ *  2010-08-09  initial version (andrei)
+*/
+
+#include "sock_ut.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <errno.h>
+#include <arpa/inet.h>
+
+
+/** get the IP TTL.
+ * @return ttl on success, < 0 on error
+ */
+int sock_get_ttl(int sock)
+{
+	int ioptval;
+	unsigned int ioptvallen;
+
+	ioptvallen=sizeof(ioptval);
+	if (getsockopt( sock, IPPROTO_IP, IP_TTL, (void*) &ioptval,
+		    &ioptvallen) == -1 )
+	{
+		return -1;
+	}
+	return ioptval;
+}
+
+
+
+/** set the IP TTL on a socket.
+ * @return ttl on success, < 0 on error
+ */
+int sock_set_ttl(int sock, int ttl)
+{
+	int ioptval;
+
+	if (setsockopt( sock, IPPROTO_IP, IP_TTL, (void*) &ioptval,
+					sizeof(ioptval)) == -1 )
+		return -1;
+	return ioptval;
+}
+
+/* vi: set ts=4 sw=4 tw=79:ai:cindent: */

+ 39 - 0
sock_ut.h

@@ -0,0 +1,39 @@
+/* 
+ * $Id$
+ * 
+ * Copyright (C) 2010 iptelorg GmbH
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/** various socket related functions.
+ * @file sock_ut.h
+ * @ingroup: core 
+ */
+/*
+ * History:
+ * --------
+ *  2010-08-09  initial version (andrei)
+*/
+
+#ifndef __sock_ut_h
+#define __sock_ut_h
+
+
+
+int sock_get_ttl(int sock);
+int sock_set_ttl(int sock, int ttl);
+
+
+#endif /*__sock_ut_h*/
+
+/* vi: set ts=4 sw=4 tw=79:ai:cindent: */

+ 50 - 19
udp_server.c

@@ -41,14 +41,14 @@
  *  2007-08-28  disable/set MTU discover option for the udp sockets
  *  2007-08-28  disable/set MTU discover option for the udp sockets
  *               (in linux it's enabled by default which produces udp packets
  *               (in linux it's enabled by default which produces udp packets
  *                with the DF flag ser) (patch from hscholz)
  *                with the DF flag ser) (patch from hscholz)
+ *  2010-06-15  support for using raw sockets for sending (andrei)
  */
  */
 
 
 
 
-/*!
- * \file
- * \brief SIP-router core :: 
- * \ingroup core
- * Module: \ref core
+/** udp send and loop-receive functions.
+ * @file udp_server.c
+ * @ingroup core
+ * Module: @ref core
  */
  */
 
 
 #include <stdlib.h>
 #include <stdlib.h>
@@ -67,6 +67,7 @@
 
 
 
 
 #include "udp_server.h"
 #include "udp_server.h"
+#include "compiler_opt.h"
 #include "globals.h"
 #include "globals.h"
 #include "config.h"
 #include "config.h"
 #include "dprint.h"
 #include "dprint.h"
@@ -74,6 +75,10 @@
 #include "mem/mem.h"
 #include "mem/mem.h"
 #include "ip_addr.h"
 #include "ip_addr.h"
 #include "cfg/cfg_struct.h"
 #include "cfg/cfg_struct.h"
+#ifdef USE_RAW_SOCKS
+#include "raw_sock.h"
+#endif /* USE_RAW_SOCKS */
+
 
 
 #ifdef USE_STUN
 #ifdef USE_STUN
   #include "ser_stun.h"
   #include "ser_stun.h"
@@ -551,6 +556,9 @@ int udp_send(struct dest_info* dst, char *buf, unsigned len)
 	int n;
 	int n;
 	int tolen;
 	int tolen;
 	struct ip_addr ip; /* used only on error, for debugging */
 	struct ip_addr ip; /* used only on error, for debugging */
+#ifdef USE_RAW_SOCKS
+	int mtu;
+#endif /* USE_RAW_SOCKS */
 
 
 #ifdef DBG_MSG_QA
 #ifdef DBG_MSG_QA
 	/* aborts on error, does nothing otherwise */
 	/* aborts on error, does nothing otherwise */
@@ -559,24 +567,47 @@ int udp_send(struct dest_info* dst, char *buf, unsigned len)
 		abort();
 		abort();
 	}
 	}
 #endif
 #endif
-
-	tolen=sockaddru_len(dst->to);
+#ifdef USE_RAW_SOCKS
+	if (likely( ! (raw_udp4_send_sock >= 0 &&
+					cfg_get(core, core_cfg, udp4_raw) &&
+					dst->send_sock->address.af == AF_INET) )) {
+#endif /* USE_RAW_SOCKS */
+		/* normal send over udp socket */
+		tolen=sockaddru_len(dst->to);
 again:
 again:
-	n=sendto(dst->send_sock->socket, buf, len, 0, &dst->to.s, tolen);
+		n=sendto(dst->send_sock->socket, buf, len, 0, &dst->to.s, tolen);
 #ifdef XL_DEBUG
 #ifdef XL_DEBUG
-	LOG(L_INFO, "INFO: send status: %d\n", n);
+		LOG(L_INFO, "INFO: send status: %d\n", n);
 #endif
 #endif
-	if (n==-1){
-		su2ip_addr(&ip, &dst->to);
-		LOG(L_ERR, "ERROR: udp_send: sendto(sock,%p,%u,0,%s:%d,%d): %s(%d)\n",
-				buf,len, ip_addr2a(&ip), su_getport(&dst->to), tolen,
-				strerror(errno),errno);
-		if (errno==EINTR) goto again;
-		if (errno==EINVAL) {
-			LOG(L_CRIT,"CRITICAL: invalid sendtoparameters\n"
-			"one possible reason is the server is bound to localhost and\n"
-			"attempts to send to the net\n");
+		if (unlikely(n==-1)){
+			su2ip_addr(&ip, &dst->to);
+			LOG(L_ERR, "ERROR: udp_send: sendto(sock,%p,%u,0,%s:%d,%d):"
+					" %s(%d)\n", buf,len, ip_addr2a(&ip),
+					su_getport(&dst->to), tolen, strerror(errno), errno);
+			if (errno==EINTR) goto again;
+			if (errno==EINVAL) {
+				LOG(L_CRIT,"CRITICAL: invalid sendtoparameters\n"
+				"one possible reason is the server is bound to localhost and\n"
+				"attempts to send to the net\n");
+			}
+		}
+#ifdef USE_RAW_SOCKS
+	} else {
+		/* send over a raw socket */
+		mtu = cfg_get(core, core_cfg, udp4_raw_mtu);
+raw_again:
+		n=raw_iphdr_udp4_send(raw_udp4_send_sock, buf, len,
+								&dst->send_sock->su,
+								&dst->to,
+								mtu);
+		if (unlikely(n==-1)){
+			su2ip_addr(&ip, &dst->to);
+			LOG(L_ERR, "ERROR: raw_iphdr_udp4_send(%d,%p,%u,...,%s:%d,%d):"
+					" %s(%d)\n", raw_udp4_send_sock, buf,len, ip_addr2a(&ip),
+					su_getport(&dst->to), mtu, strerror(errno), errno);
+			if (errno==EINTR) goto raw_again;
 		}
 		}
 	}
 	}
+#endif /* USE_RAW_SOCKS */
 	return n;
 	return n;
 }
 }

+ 8 - 1
ver_defs.h

@@ -76,6 +76,13 @@
 #endif
 #endif
 
 
 
 
+#ifdef USE_RAW_SOCKS
+#define USE_RAW_SOCKS_STR ", USE_RAW_SOCKS"
+#else
+#define USE_RAW_SOCKS_STR ""
+#endif
+
+
 #ifdef DISABLE_NAGLE
 #ifdef DISABLE_NAGLE
 #define DISABLE_NAGLE_STR ", DISABLE_NAGLE"
 #define DISABLE_NAGLE_STR ", DISABLE_NAGLE"
 #else
 #else
@@ -334,7 +341,7 @@
 
 
 #define SER_COMPILE_FLAGS \
 #define SER_COMPILE_FLAGS \
 	STATS_STR EXTRA_DEBUG_STR USE_IPV6_STR USE_TCP_STR USE_TLS_STR \
 	STATS_STR EXTRA_DEBUG_STR USE_IPV6_STR USE_TCP_STR USE_TLS_STR \
-	USE_SCTP_STR CORE_TLS_STR TLS_HOOKS_STR \
+	USE_SCTP_STR CORE_TLS_STR TLS_HOOKS_STR  USE_RAW_SOCKS_STR \
 	USE_STUN_STR DISABLE_NAGLE_STR USE_MCAST_STR NO_DEBUG_STR NO_LOG_STR \
 	USE_STUN_STR DISABLE_NAGLE_STR USE_MCAST_STR NO_DEBUG_STR NO_LOG_STR \
 	NO_SIG_DEBUG_STR DNS_IP_HACK_STR  SHM_MEM_STR SHM_MMAP_STR PKG_MALLOC_STR \
 	NO_SIG_DEBUG_STR DNS_IP_HACK_STR  SHM_MEM_STR SHM_MMAP_STR PKG_MALLOC_STR \
 	F_MALLOC_STR DL_MALLOC_STR SF_MALLOC_STR  LL_MALLOC_STR \
 	F_MALLOC_STR DL_MALLOC_STR SF_MALLOC_STR  LL_MALLOC_STR \