raw_sock.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. /*
  2. * $Id$
  3. *
  4. * Copyright (C) 2010 iptelorg GmbH
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. /** raw socket functions.
  19. * @file raw_sock.c
  20. * @ingroup core
  21. * Module: @ref core
  22. */
  23. /*
  24. * History:
  25. * --------
  26. * 2010-06-07 initial version (from older code) andrei
  27. * 2010-06-15 IP_HDRINCL raw socket support, including on-send
  28. * fragmentation (andrei)
  29. */
  30. #ifdef USE_RAW_SOCKS
  31. #include "compiler_opt.h"
  32. #include "ip_addr.h"
  33. #include "dprint.h"
  34. #include "str.h"
  35. #include "rand/fastrand.h"
  36. #include "globals.h"
  37. #include <errno.h>
  38. #include <string.h>
  39. #include <unistd.h>
  40. #include <sys/types.h>
  41. #include <fcntl.h>
  42. #include <sys/socket.h>
  43. #include <netinet/in.h>
  44. #include <netinet/in_systm.h>
  45. #include <arpa/inet.h>
  46. #ifndef __USE_BSD
  47. #define __USE_BSD /* on linux use bsd version of iphdr (more portable) */
  48. #endif /* __USE_BSD */
  49. #include <netinet/ip.h>
  50. #define __FAVOR_BSD /* on linux use bsd version of udphdr (more portable) */
  51. #include <netinet/udp.h>
  52. #include "raw_sock.h"
  53. #include "cfg/cfg.h"
  54. #include "cfg_core.h"
  55. #if defined (__OS_freebsd) || defined (__OS_netbsd) || defined(__OS_openbsd) \
  56. || defined (__OS_darwin)
  57. /** fragmentation is done by the kernel (no need to do it in userspace) */
  58. #define RAW_IPHDR_INC_AUTO_FRAG
  59. #endif /* __OS_* */
  60. /* macros for converting values in the expected format */
  61. #if defined (__OS_freebsd) || defined (__OS_netbsd) || defined (__OS_darwin)
  62. /* on freebsd and netbsd the ip offset (along with flags) and the
  63. ip header length must be filled in _host_ bytes order format.
  64. The same is true for openbsd < 2.1.
  65. */
  66. /** convert the ip offset in the format expected by the kernel. */
  67. #define RAW_IPHDR_IP_OFF(off) (unsigned short)(off)
  68. /** convert the ip total length in the format expected by the kernel. */
  69. #define RAW_IPHDR_IP_LEN(tlen) (unsigned short)(tlen)
  70. #else /* __OS_* */
  71. /* linux, openbsd >= 2.1 a.s.o. */
  72. /** convert the ip offset in the format expected by the kernel. */
  73. #define RAW_IPHDR_IP_OFF(off) htons((unsigned short)(off))
  74. /** convert the ip total length in the format expected by the kernel. */
  75. #define RAW_IPHDR_IP_LEN(tlen) htons((unsigned short)(tlen))
  76. #endif /* __OS_* */
  77. int raw_ipip = 0; /* set if raw socket is in capture mode for IPIP */
  78. /** create and return a raw socket.
  79. * @param proto - protocol used (e.g. IPPROTO_UDP, IPPROTO_RAW)
  80. * @param ip - if not null the socket will be bound on this ip.
  81. * @param iface - if not null the socket will be bound to this interface
  82. * (SO_BINDTODEVICE). This is supported only on linux.
  83. * @param iphdr_incl - set to 1 if packets send on this socket include
  84. * a pre-built ip header (some fields, like the checksum
  85. * will still be filled by the kernel, OTOH packet
  86. * fragmentation has to be done in user space).
  87. * @return socket on success, -1 on error
  88. */
  89. int raw_socket(int proto, struct ip_addr* ip, str* iface, int iphdr_incl)
  90. {
  91. int sock;
  92. int t;
  93. union sockaddr_union su;
  94. #if defined (SO_BINDTODEVICE)
  95. char short_ifname[sizeof(int)];
  96. int ifname_len;
  97. char* ifname;
  98. #endif /* SO_BINDTODEVICE */
  99. sock = socket(PF_INET, SOCK_RAW, proto);
  100. if (sock==-1)
  101. goto error;
  102. /* set socket options */
  103. if (iphdr_incl) {
  104. t=1;
  105. if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &t, sizeof(t))<0){
  106. ERR("raw_socket: setsockopt(IP_HDRINCL) failed: %s [%d]\n",
  107. strerror(errno), errno);
  108. goto error;
  109. }
  110. } else {
  111. /* IP_PKTINFO makes no sense if the ip header is included */
  112. /* using IP_PKTINFO */
  113. t=1;
  114. #ifdef IP_PKTINFO
  115. if (setsockopt(sock, IPPROTO_IP, IP_PKTINFO, &t, sizeof(t))<0){
  116. ERR("raw_socket: setsockopt(IP_PKTINFO) failed: %s [%d]\n",
  117. strerror(errno), errno);
  118. goto error;
  119. }
  120. #elif defined(IP_RECVDSTADDR)
  121. if (setsockopt(sock, IPPROTO_IP, IP_RECVDSTADDR, &t, sizeof(t))<0){
  122. ERR("raw_socket: setsockop(IP_RECVDSTADDR) failed: %s [%d]\n",
  123. strerror(errno), errno);
  124. goto error;
  125. }
  126. #else
  127. #error "no method of getting the destination ip address supported"
  128. #endif /* IP_RECVDSTADDR / IP_PKTINFO */
  129. }
  130. #if defined (IP_MTU_DISCOVER) && defined (IP_PMTUDISC_DONT)
  131. t=IP_PMTUDISC_DONT;
  132. if(setsockopt(sock, IPPROTO_IP, IP_MTU_DISCOVER, &t, sizeof(t)) ==-1){
  133. ERR("raw_socket: setsockopt(IP_MTU_DISCOVER): %s\n",
  134. strerror(errno));
  135. goto error;
  136. }
  137. #endif /* IP_MTU_DISCOVER && IP_PMTUDISC_DONT */
  138. if (iface && iface->s){
  139. #if defined (SO_BINDTODEVICE)
  140. /* workaround for linux bug: arg to setsockopt must have at least
  141. * sizeof(int) size or EINVAL would be returned */
  142. if (iface->len<sizeof(int)){
  143. memcpy(short_ifname, iface->s, iface->len);
  144. short_ifname[iface->len]=0; /* make sure it's zero term */
  145. ifname_len=sizeof(short_ifname);
  146. ifname=short_ifname;
  147. }else{
  148. ifname_len=iface->len;
  149. ifname=iface->s;
  150. }
  151. if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, ifname, ifname_len)
  152. <0){
  153. ERR("raw_socket: could not bind to %.*s: %s [%d]\n",
  154. iface->len, ZSW(iface->s), strerror(errno), errno);
  155. goto error;
  156. }
  157. #else /* !SO_BINDTODEVICE */
  158. /* SO_BINDTODEVICE is linux specific => cannot bind to a device */
  159. ERR("raw_socket: bind to device supported only on linux\n");
  160. goto error;
  161. #endif /* SO_BINDTODEVICE */
  162. }
  163. /* FIXME: probe_max_receive_buffer(sock) missing */
  164. if (ip){
  165. init_su(&su, ip, 0);
  166. if (bind(sock, &su.s, sockaddru_len(su))==-1){
  167. ERR("raw_socket: bind(%s) failed: %s [%d]\n",
  168. ip_addr2a(ip), strerror(errno), errno);
  169. goto error;
  170. }
  171. }
  172. return sock;
  173. error:
  174. if (sock!=-1) close(sock);
  175. return -1;
  176. }
  177. /** create and return an udp over ipv4 raw socket.
  178. * @param ip - if not null the socket will be bound on this ip.
  179. * @param iface - if not null the socket will be bound to this interface
  180. * (SO_BINDTODEVICE).
  181. * @param iphdr_incl - set to 1 if packets send on this socket include
  182. * a pre-built ip header (some fields, like the checksum
  183. * will still be filled by the kernel, OTOH packet
  184. * fragmentation has to be done in user space).
  185. * @return socket on success, -1 on error
  186. */
  187. int raw_udp4_socket(struct ip_addr* ip, str* iface, int iphdr_incl)
  188. {
  189. return raw_socket(IPPROTO_UDP, ip, iface, iphdr_incl);
  190. }
  191. /** receives an ipv4 packet using a raw socket.
  192. * An ipv4 packet is received in buf, using IP_PKTINFO or IP_RECVDSTADDR.
  193. * from and to are filled (only the ip part the ports are 0 since this
  194. * function doesn't try to look beyond the IP level).
  195. * @param sock - raw socket
  196. * @param buf - detination buffer.
  197. * @param len - buffer len (should be enough for receiving a packet +
  198. * IP header).
  199. * @param from - result parameter, the IP address part of it will be filled
  200. * with the source address and the port with 0.
  201. * @param to - result parameter, the IP address part of it will be filled
  202. * with the destination (local) address and the port with 0.
  203. * @return packet len or <0 on error: -1 (check errno),
  204. * -2 no IP_PKTINFO/IP_RECVDSTADDR found or AF mismatch
  205. */
  206. int recvpkt4(int sock, char* buf, int len, union sockaddr_union* from,
  207. union sockaddr_union* to)
  208. {
  209. struct iovec iov[1];
  210. struct msghdr rcv_msg;
  211. struct cmsghdr* cmsg;
  212. #ifdef IP_PKTINFO
  213. struct in_pktinfo* rcv_pktinfo;
  214. #endif /* IP_PKTINFO */
  215. int n, ret;
  216. char msg_ctrl_buf[1024];
  217. iov[0].iov_base=buf;
  218. iov[0].iov_len=len;
  219. rcv_msg.msg_name=from;
  220. rcv_msg.msg_namelen=sockaddru_len(*from);
  221. rcv_msg.msg_control=msg_ctrl_buf;
  222. rcv_msg.msg_controllen=sizeof(msg_ctrl_buf);
  223. rcv_msg.msg_iov=&iov[0];
  224. rcv_msg.msg_iovlen=1;
  225. ret=-2; /* no PKT_INFO or AF mismatch */
  226. retry:
  227. n=recvmsg(sock, &rcv_msg, MSG_WAITALL);
  228. if (unlikely(n==-1)){
  229. if (errno==EINTR)
  230. goto retry;
  231. ret=n;
  232. goto end;
  233. }
  234. /* find the pkt info */
  235. for (cmsg=CMSG_FIRSTHDR(&rcv_msg); cmsg; cmsg=CMSG_NXTHDR(&rcv_msg, cmsg)){
  236. #ifdef IP_PKTINFO
  237. if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
  238. (cmsg->cmsg_type==IP_PKTINFO))) {
  239. rcv_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
  240. to->sin.sin_family=AF_INET;
  241. memcpy(&to->sin.sin_addr, &rcv_pktinfo->ipi_spec_dst.s_addr,
  242. sizeof(to->sin.sin_addr));
  243. to->sin.sin_port=0; /* not known */
  244. /* interface no. in ipi_ifindex */
  245. ret=n; /* success */
  246. break;
  247. }
  248. #elif defined (IP_RECVDSTADDR)
  249. if (likely((cmsg->cmsg_level==IPPROTO_IP) &&
  250. (cmsg->cmsg_type==IP_RECVDSTADDR))) {
  251. to->sin.sin_family=AF_INET;
  252. memcpy(&to->sin.sin_addr, CMSG_DATA(cmsg),
  253. sizeof(to->sin.sin_addr));
  254. to->sin.sin_port=0; /* not known */
  255. ret=n; /* success */
  256. break;
  257. }
  258. #else
  259. #error "no method of getting the destination ip address supported"
  260. #endif /* IP_PKTINFO / IP_RECVDSTADDR */
  261. }
  262. end:
  263. return ret;
  264. }
  265. /* receive an ipv4 udp packet over a raw socket.
  266. * The packet is copied in *buf and *buf is advanced to point to the
  267. * payload. Fills from and to.
  268. * @param rsock - raw socket
  269. * @param buf - the packet will be written to where *buf points intially and
  270. * then *buf will be advanced to point to the udp payload.
  271. * @param len - buffer length (should be enough to hold at least the
  272. * ip and udp headers + 1 byte).
  273. * @param from - result parameter, filled with source address and port of the
  274. * packet.
  275. * @param from - result parameter, filled with destination (local) address and
  276. * port of the packet.
  277. * @param rf - filter used to decide whether or not the packet is
  278. * accepted/processed. If null, all the packets are accepted.
  279. * @return packet len or <0 on error (-1 and -2 on recv error @see recvpkt4,
  280. * -3 if the headers are invalid and -4 if the packet doesn't
  281. * match the filter).
  282. */
  283. int raw_udp4_recv(int rsock, char** buf, int len, union sockaddr_union* from,
  284. union sockaddr_union* to, struct raw_filter* rf)
  285. {
  286. int n;
  287. unsigned short dst_port;
  288. unsigned short src_port;
  289. struct ip_addr dst_ip;
  290. char* end;
  291. char* udph_start;
  292. char* udp_payload;
  293. struct ip iph;
  294. struct udphdr udph;
  295. unsigned short udp_len;
  296. n=recvpkt4(rsock, *buf, len, from, to);
  297. if (unlikely(n<0)) goto error;
  298. end=*buf+n;
  299. if (unlikely(n<((sizeof(struct ip) * raw_ipip ? 2 : 1)+sizeof(struct udphdr)))) {
  300. n=-3;
  301. goto error;
  302. }
  303. if(raw_ipip)
  304. *buf = *buf + sizeof(struct ip);
  305. /* FIXME: if initial buffer is aligned, one could skip the memcpy
  306. and directly cast ip and udphdr pointer to the memory */
  307. memcpy(&iph, *buf, sizeof(struct ip));
  308. udph_start=*buf+iph.ip_hl*4;
  309. udp_payload=udph_start+sizeof(struct udphdr);
  310. if (unlikely(udp_payload>end)){
  311. n=-3;
  312. goto error;
  313. }
  314. memcpy(&udph, udph_start, sizeof(struct udphdr));
  315. udp_len=ntohs(udph.uh_ulen);
  316. if (unlikely((udph_start+udp_len)!=end)){
  317. if ((udph_start+udp_len)>end){
  318. n=-3;
  319. goto error;
  320. }else{
  321. ERR("udp length too small: %d/%d\n",
  322. (int)udp_len, (int)(end-udph_start));
  323. n=-3;
  324. goto error;
  325. }
  326. }
  327. /* advance buf */
  328. *buf=udp_payload;
  329. n=(int)(end-*buf);
  330. /* fill ip from the packet (needed if no PKT_INFO is used) */
  331. dst_ip.af=AF_INET;
  332. dst_ip.len=4;
  333. dst_ip.u.addr32[0]=iph.ip_dst.s_addr;
  334. /* fill dst_port */
  335. dst_port=ntohs(udph.uh_dport);
  336. ip_addr2su(to, &dst_ip, dst_port);
  337. /* fill src_port */
  338. src_port=ntohs(udph.uh_sport);
  339. su_setport(from, src_port);
  340. if (likely(rf)) {
  341. su2ip_addr(&dst_ip, to);
  342. if ( (dst_port && rf->port1 && ((dst_port<rf->port1) ||
  343. (dst_port>rf->port2)) ) ||
  344. (matchnet(&dst_ip, &rf->dst)!=1) ){
  345. /* no match */
  346. n=-4;
  347. goto error;
  348. }
  349. }
  350. error:
  351. return n;
  352. }
  353. /** udp checksum helper: compute the pseudo-header 16-bit "sum".
  354. * Computes the partial checksum (no complement) of the pseudo-header.
  355. * It is meant to be used by udpv4_chksum().
  356. * @param uh - filled udp header
  357. * @param src - source ip address in network byte order.
  358. * @param dst - destination ip address in network byte order.
  359. * @param length - payload length (not including the udp header),
  360. * in _host_ order.
  361. * @return the partial checksum in host order
  362. */
  363. inline unsigned short udpv4_vhdr_sum( struct udphdr* uh,
  364. struct in_addr* src,
  365. struct in_addr* dst,
  366. unsigned short length)
  367. {
  368. unsigned sum;
  369. /* pseudo header */
  370. sum=(src->s_addr>>16)+(src->s_addr&0xffff)+
  371. (dst->s_addr>>16)+(dst->s_addr&0xffff)+
  372. htons(IPPROTO_UDP)+(uh->uh_ulen);
  373. /* udp header */
  374. sum+=(uh->uh_dport)+(uh->uh_sport)+(uh->uh_ulen) + 0 /*chksum*/;
  375. /* fold it */
  376. sum=(sum>>16)+(sum&0xffff);
  377. sum+=(sum>>16);
  378. /* no complement */
  379. return ntohs((unsigned short) sum);
  380. }
  381. /** compute the udp over ipv4 checksum.
  382. * @param u - filled udp header (except checksum).
  383. * @param src - source ip v4 address, in _network_ byte order.
  384. * @param dst - destination ip v4 address, int _network_ byte order.
  385. * @param data - pointer to the udp payload.
  386. * @param length - payload length, not including the udp header and in
  387. * _host_ order. The length mist be <= 0xffff - 8
  388. * (to allow space for the udp header).
  389. * @return the checksum in _host_ order */
  390. inline static unsigned short udpv4_chksum(struct udphdr* u,
  391. struct in_addr* src, struct in_addr* dst,
  392. unsigned char* data, unsigned short length)
  393. {
  394. unsigned sum;
  395. unsigned char* end;
  396. sum=udpv4_vhdr_sum(u, src, dst, length);
  397. end=data+(length&(~0x1)); /* make sure it's even */
  398. /* TODO: 16 & 32 bit aligned version */
  399. /* not aligned */
  400. for(;data<end;data+=2){
  401. sum+=((data[0]<<8)+data[1]);
  402. }
  403. if (length&0x1)
  404. sum+=((*data)<<8);
  405. /* fold it */
  406. sum=(sum>>16)+(sum&0xffff);
  407. sum+=(sum>>16);
  408. return (unsigned short)~sum;
  409. }
  410. /** fill in an udp header.
  411. * @param u - udp header that will be filled.
  412. * @param from - source ip v4 address and port.
  413. * @param to - destination ip v4 address and port.
  414. * @param buf - pointer to the payload.
  415. * @param len - payload length (not including the udp header).
  416. * @param do_chk - if set the udp checksum will be computed, else it will
  417. * be set to 0.
  418. * @return 0 on success, < 0 on error.
  419. */
  420. inline static int mk_udp_hdr(struct udphdr* u, struct sockaddr_in* from,
  421. struct sockaddr_in* to, unsigned char* buf, int len,
  422. int do_chk)
  423. {
  424. u->uh_ulen=htons((unsigned short)len+sizeof(struct udphdr));
  425. u->uh_sport=from->sin_port;
  426. u->uh_dport=to->sin_port;
  427. if (do_chk)
  428. u->uh_sum=htons(
  429. udpv4_chksum(u, &from->sin_addr, &to->sin_addr, buf, len));
  430. else
  431. u->uh_sum=0; /* no checksum */
  432. return 0;
  433. }
  434. /** fill in an ip header.
  435. * Note: the checksum is _not_ computed.
  436. * WARNING: The ip header length and offset might be filled in
  437. * _host_ byte order or network byte order (depending on the OS, for example
  438. * freebsd needs host byte order for raw sockets with IPHDR_INC, while
  439. * linux needs network byte order).
  440. * @param iph - ip header that will be filled.
  441. * @param from - source ip v4 address (network byte order).
  442. * @param to - destination ip v4 address (network byte order).
  443. * @param payload len - payload length (not including the ip header).
  444. * @param proto - protocol.
  445. * @return 0 on success, < 0 on error.
  446. */
  447. inline static int mk_ip_hdr(struct ip* iph, struct in_addr* from,
  448. struct in_addr* to, int payload_len, unsigned char proto)
  449. {
  450. iph->ip_hl = sizeof(struct ip)/4;
  451. iph->ip_v = 4;
  452. iph->ip_tos = tos;
  453. /* on freebsd ip_len _must_ be in _host_ byte order instead
  454. of network byte order. On linux the length is ignored (it's filled
  455. automatically every time). */
  456. iph->ip_len = RAW_IPHDR_IP_LEN(payload_len + sizeof(struct ip));
  457. iph->ip_id = 0; /* 0 => will be filled automatically by the kernel */
  458. iph->ip_off = 0; /* frag.: first 3 bits=flags=0, last 13 bits=offset */
  459. iph->ip_ttl = cfg_get(core, core_cfg, udp4_raw_ttl);
  460. iph->ip_p = proto;
  461. iph->ip_src = *from;
  462. iph->ip_dst = *to;
  463. iph->ip_sum = 0;
  464. return 0;
  465. }
  466. /** send an udp packet over a non-ip_hdrincl raw socket.
  467. * @param rsock - raw socket
  468. * @param buf - data
  469. * @param len - data len
  470. * @param from - source address:port (_must_ be non-null, but the ip address
  471. * can be 0, in which case it will be filled by the kernel).
  472. * @param to - destination address:port
  473. * @return <0 on error (errno set too), number of bytes sent on success
  474. * (including the udp header => on success len + udpheader size).
  475. */
  476. int raw_udp4_send(int rsock, char* buf, unsigned int len,
  477. union sockaddr_union* from,
  478. union sockaddr_union* to)
  479. {
  480. struct msghdr snd_msg;
  481. struct cmsghdr* cmsg;
  482. #ifdef IP_PKTINFO
  483. struct in_pktinfo* snd_pktinfo;
  484. #endif /* IP_PKTINFO */
  485. struct iovec iov[2];
  486. struct udphdr udp_hdr;
  487. char msg_ctrl_snd_buf[1024];
  488. int ret;
  489. memset(&snd_msg, 0, sizeof(snd_msg));
  490. snd_msg.msg_name=&to->sin;
  491. snd_msg.msg_namelen=sockaddru_len(*to);
  492. snd_msg.msg_iov=&iov[0];
  493. /* prepare udp header */
  494. mk_udp_hdr(&udp_hdr, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
  495. iov[0].iov_base=(char*)&udp_hdr;
  496. iov[0].iov_len=sizeof(udp_hdr);
  497. iov[1].iov_base=buf;
  498. iov[1].iov_len=len;
  499. snd_msg.msg_iovlen=2;
  500. snd_msg.msg_control=msg_ctrl_snd_buf;
  501. snd_msg.msg_controllen=sizeof(msg_ctrl_snd_buf);
  502. /* init pktinfo cmsg */
  503. cmsg=CMSG_FIRSTHDR(&snd_msg);
  504. cmsg->cmsg_level=IPPROTO_IP;
  505. #ifdef IP_PKTINFO
  506. cmsg->cmsg_type=IP_PKTINFO;
  507. cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_pktinfo));
  508. snd_pktinfo=(struct in_pktinfo*)CMSG_DATA(cmsg);
  509. snd_pktinfo->ipi_ifindex=0;
  510. snd_pktinfo->ipi_spec_dst.s_addr=from->sin.sin_addr.s_addr;
  511. #elif defined (IP_SENDSRCADDR)
  512. cmsg->cmsg_type=IP_SENDSRCADDR;
  513. cmsg->cmsg_len=CMSG_LEN(sizeof(struct in_addr));
  514. memcpy(CMSG_DATA(cmsg), &from->sin.sin_addr.s_addr,
  515. sizeof(struct in_addr));
  516. #else
  517. #error "no method of setting the source ip supported"
  518. #endif /* IP_PKTINFO / IP_SENDSRCADDR */
  519. snd_msg.msg_controllen=cmsg->cmsg_len;
  520. snd_msg.msg_flags=0;
  521. ret=sendmsg(rsock, &snd_msg, 0);
  522. return ret;
  523. }
  524. /** send an udp packet over an IP_HDRINCL raw socket.
  525. * If needed, send several fragments.
  526. * @param rsock - raw socket
  527. * @param buf - data
  528. * @param len - data len
  529. * @param from - source address:port (_must_ be non-null, but the ip address
  530. * can be 0, in which case it will be filled by the kernel).
  531. * @param to - destination address:port
  532. * @param mtu - maximum datagram size (including the ip header, excluding
  533. * link layer headers). Minimum allowed size is 28
  534. * (sizeof(ip_header + udp_header)). If mtu is lower, it will
  535. * be ignored (the packet will be sent un-fragmented).
  536. * 0 can be used to disable fragmentation.
  537. * @return <0 on error (-2: datagram too big, -1: check errno),
  538. * number of bytes sent on success
  539. * (including the ip & udp headers =>
  540. * on success len + udpheader + ipheader size).
  541. */
  542. int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
  543. union sockaddr_union* from,
  544. union sockaddr_union* to, unsigned short mtu)
  545. {
  546. struct msghdr snd_msg;
  547. struct iovec iov[2];
  548. struct ip_udp_hdr {
  549. struct ip ip;
  550. struct udphdr udp;
  551. } hdr;
  552. unsigned int totlen;
  553. #ifndef RAW_IPHDR_INC_AUTO_FRAG
  554. unsigned int ip_frag_size; /* fragment size */
  555. unsigned int last_frag_extra; /* extra bytes possible in the last frag */
  556. unsigned int ip_payload;
  557. unsigned int last_frag_offs;
  558. void* last_frag_start;
  559. int frg_no;
  560. #endif /* RAW_IPHDR_INC_AUTO_FRAG */
  561. int ret;
  562. totlen = len + sizeof(hdr);
  563. if (unlikely(totlen) > 65535)
  564. return -2;
  565. memset(&snd_msg, 0, sizeof(snd_msg));
  566. snd_msg.msg_name=&to->sin;
  567. snd_msg.msg_namelen=sockaddru_len(*to);
  568. snd_msg.msg_iov=&iov[0];
  569. /* prepare the udp & ip headers */
  570. mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
  571. mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
  572. len + sizeof(hdr.udp), IPPROTO_UDP);
  573. iov[0].iov_base=(char*)&hdr;
  574. iov[0].iov_len=sizeof(hdr);
  575. snd_msg.msg_iovlen=2;
  576. snd_msg.msg_control=0;
  577. snd_msg.msg_controllen=0;
  578. snd_msg.msg_flags=0;
  579. /* this part changes for different fragments */
  580. /* packets are fragmented if mtu has a valid value (at least an
  581. IP header + UDP header fit in it) and if the total length is greater
  582. then the mtu */
  583. #ifndef RAW_IPHDR_INC_AUTO_FRAG
  584. if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
  585. #endif /* RAW_IPHDR_INC_AUTO_FRAG */
  586. iov[1].iov_base=buf;
  587. iov[1].iov_len=len;
  588. ret=sendmsg(rsock, &snd_msg, 0);
  589. #ifndef RAW_IPHDR_INC_AUTO_FRAG
  590. } else {
  591. ip_payload = len + sizeof(hdr.udp);
  592. /* a fragment offset must be a multiple of 8 => its size must
  593. also be a multiple of 8, except for the last fragment */
  594. ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
  595. last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
  596. frg_no = ip_payload / ip_frag_size +
  597. ((ip_payload % ip_frag_size) > last_frag_extra);
  598. /*ip_last_frag_size = ip_payload % frag_size +
  599. ((ip_payload % frag_size) <= last_frag_extra) *
  600. ip_frag_size; */
  601. last_frag_offs = (frg_no - 1) * ip_frag_size;
  602. /* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
  603. => last_frag_offs >= sizeof(hdr.udp) */
  604. last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
  605. hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0
  606. (if 0 the kernel will fill it) */
  607. /* send the first fragment */
  608. iov[1].iov_base=buf;
  609. /* ip_frag_size >= sizeof(hdr.udp) because we are here only
  610. if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
  611. iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
  612. hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
  613. hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
  614. ret=sendmsg(rsock, &snd_msg, 0);
  615. if (unlikely(ret < 0))
  616. goto end;
  617. /* all the other fragments, include only the ip header */
  618. iov[0].iov_len = sizeof(hdr.ip);
  619. iov[1].iov_base = (char*)iov[1].iov_base + iov[1].iov_len;
  620. /* fragments between the first and the last */
  621. while(unlikely(iov[1].iov_base < last_frag_start)) {
  622. iov[1].iov_len = ip_frag_size;
  623. hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
  624. /* set MF */
  625. hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
  626. (((char*)iov[1].iov_base - (char*)buf +
  627. sizeof(hdr.udp)) / 8) | 0x2000 );
  628. ret=sendmsg(rsock, &snd_msg, 0);
  629. if (unlikely(ret < 0))
  630. goto end;
  631. iov[1].iov_base = (char*)iov[1].iov_base + iov[1].iov_len;
  632. }
  633. /* last fragment */
  634. iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
  635. hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
  636. /* don't set MF (last fragment) */
  637. hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
  638. (((char*)iov[1].iov_base - (char*)buf +
  639. sizeof(hdr.udp)) / 8) );
  640. ret=sendmsg(rsock, &snd_msg, 0);
  641. if (unlikely(ret < 0))
  642. goto end;
  643. }
  644. end:
  645. #endif /* RAW_IPHDR_INC_AUTO_FRAG */
  646. return ret;
  647. }
  648. #endif /* USE_RAW_SOCKS */