* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) University of Cambridge 1995 - 2015 */
+/* Copyright (c) University of Cambridge 1995 - 2018 */
+/* Copyright (c) The Exim Maintainers 2020 - 2021 */
/* See the file NOTICE for conditions of use and distribution. */
/* Functions for doing things with sockets. With the advent of IPv6 this has
got messier, so that it's worth pulling out the code into separate functions
-that other parts of Exim can call, expecially as there are now several
+that other parts of Exim can call, especially as there are now several
different places in the code where sockets are used. */
#include "exim.h"
+#if defined(TCP_FASTOPEN)
+# if defined(MSG_FASTOPEN) || defined(EXIM_TFO_CONNECTX) || defined(EXIM_TFO_FREEBSD)
+# define EXIM_SUPPORT_TFO
+# endif
+#endif
+
/*************************************************
* Create a socket *
*************************************************/
return sizeof(sin->v6);
}
else
-#else /* HAVE_IPv6 */
-af = af; /* Avoid compiler warning */
#endif /* HAVE_IPV6 */
/* Setup code when using IPv4 socket. The wildcard address is "". */
address the remote address, in text form
port the remote port
timeout a timeout (zero for indefinite timeout)
+ fastopen_blob non-null iff TCP_FASTOPEN can be used; may indicate early-data to
+ be sent in SYN segment. Any such data must be idempotent.
Returns: 0 on success; -1 on failure, with errno set
*/
int
-ip_connect(int sock, int af, const uschar *address, int port, int timeout)
+ip_connect(int sock, int af, const uschar *address, int port, int timeout,
+ const blob * fastopen_blob)
{
struct sockaddr_in s_in4;
struct sockaddr *s_ptr;
s_len = sizeof(s_in6);
}
else
-#else /* HAVE_IPV6 */
-af = af; /* Avoid compiler warning */
#endif /* HAVE_IPV6 */
/* For an IPv4 address, use an IPv4 sockaddr structure, even on a system with
/* If no connection timeout is set, just call connect() without setting a
timer, thereby allowing the inbuilt OS timeout to operate. */
+callout_address = string_sprintf("[%s]:%d", address, port);
sigalrm_seen = FALSE;
-if (timeout > 0) alarm(timeout);
-rc = connect(sock, s_ptr, s_len);
+if (timeout > 0) ALARM(timeout);
+
+#ifdef EXIM_SUPPORT_TFO
+/* TCP Fast Open, if the system has a cookie from a previous call to
+this peer, can send data in the SYN packet. The peer can send data
+before it gets our ACK of its SYN,ACK - the latter is useful for
+the SMTP banner. Other (than SMTP) cases of TCP connections can
+possibly use the data-on-syn, so support that too. */
+
+if (fastopen_blob && f.tcp_fastopen_ok)
+ {
+# ifdef MSG_FASTOPEN
+ /* This is a Linux implementation. */
+
+ if ((rc = sendto(sock, fastopen_blob->data, fastopen_blob->len,
+ MSG_FASTOPEN | MSG_DONTWAIT, s_ptr, s_len)) >= 0)
+ /* seen for with-data, experimental TFO option, with-cookie case */
+ /* seen for with-data, proper TFO opt, with-cookie case */
+ {
+ DEBUG(D_transport|D_v)
+ debug_printf(" TFO mode connection attempt to %s, %lu data\n",
+ address, (unsigned long)fastopen_blob->len);
+ /*XXX also seen on successful TFO, sigh */
+ tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
+ }
+ else switch (errno)
+ {
+ case EINPROGRESS: /* expected if we had no cookie for peer */
+ /* seen for no-data, proper TFO option, both cookie-request and with-cookie cases */
+ /* apparently no visibility of the diffference at this point */
+ /* seen for with-data, proper TFO opt, cookie-req */
+ /* with netwk delay, post-conn tcp_info sees unacked 1 for R, 2 for C; code in smtp_out.c */
+ /* ? older Experimental TFO option behaviour ? */
+ DEBUG(D_transport|D_v) debug_printf(" TFO mode sendto, %s data: EINPROGRESS\n",
+ fastopen_blob->len > 0 ? "with" : "no");
+ if (!fastopen_blob->data)
+ {
+ tcp_out_fastopen = TFO_ATTEMPTED_NODATA; /* we tried; unknown if useful yet */
+ rc = 0;
+ }
+ else /* queue unsent data */
+ rc = send(sock, fastopen_blob->data, fastopen_blob->len, 0);
+ break;
+
+ case EOPNOTSUPP:
+ DEBUG(D_transport)
+ debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
+ goto legacy_connect;
+
+ case EPIPE:
+ DEBUG(D_transport)
+ debug_printf("Tried TCP Fast Open but kernel too old to support it\n");
+ goto legacy_connect;
+ }
+
+# elif defined(EXIM_TFO_FREEBSD)
+ /* Re: https://people.freebsd.org/~pkelsey/tfo-tools/tfo-client.c */
+
+ if (setsockopt(sock, IPPROTO_TCP, TCP_FASTOPEN, &on, sizeof(on)) < 0)
+ {
+ DEBUG(D_transport)
+ debug_printf("Tried TCP Fast Open but apparently not enabled by sysctl\n");
+ goto legacy_connect;
+ }
+ if ((rc = sendto(sock, fastopen_blob->data, fastopen_blob->len, 0,
+ s_ptr, s_len)) >= 0)
+ {
+ DEBUG(D_transport|D_v)
+ debug_printf(" TFO mode connection attempt to %s, %lu data\n",
+ address, (unsigned long)fastopen_blob->len);
+ tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
+ }
+
+# elif defined(EXIM_TFO_CONNECTX)
+ /* MacOS */
+ sa_endpoints_t ends = {
+ .sae_srcif = 0, .sae_srcaddr = NULL, .sae_srcaddrlen = 0,
+ .sae_dstaddr = s_ptr, .sae_dstaddrlen = s_len };
+ struct iovec iov = {
+ .iov_base = fastopen_blob->data, .iov_len = fastopen_blob->len };
+ size_t len;
+
+ if ((rc = connectx(sock, &ends, SAE_ASSOCID_ANY,
+ CONNECT_DATA_IDEMPOTENT, &iov, 1, &len, NULL)) == 0)
+ {
+ DEBUG(D_transport|D_v)
+ debug_printf(" TFO mode connection attempt to %s, %lu data\n",
+ address, (unsigned long)fastopen_blob->len);
+ tcp_out_fastopen = fastopen_blob->len > 0 ? TFO_ATTEMPTED_DATA : TFO_ATTEMPTED_NODATA;
+
+ if (len != fastopen_blob->len)
+ DEBUG(D_transport|D_v)
+ debug_printf(" only queued %lu data!\n", (unsigned long)len);
+ }
+ else if (errno == EINPROGRESS)
+ {
+ DEBUG(D_transport|D_v) debug_printf(" TFO mode connectx, %s data: EINPROGRESS\n",
+ fastopen_blob->len > 0 ? "with" : "no");
+ if (!fastopen_blob->data)
+ {
+ tcp_out_fastopen = TFO_ATTEMPTED_NODATA; /* we tried; unknown if useful yet */
+ rc = 0;
+ }
+ else /* assume that no data was queued; block in send */
+ rc = send(sock, fastopen_blob->data, fastopen_blob->len, 0);
+ }
+# endif
+ }
+else
+#endif /*EXIM_SUPPORT_TFO*/
+ {
+#if defined(EXIM_SUPPORT_TFO) && !defined(EXIM_TFO_CONNECTX)
+legacy_connect:
+#endif
+
+ DEBUG(D_transport|D_v) if (fastopen_blob)
+ debug_printf(" non-TFO mode connection attempt to %s, %lu data\n",
+ address, (unsigned long)fastopen_blob->len);
+ if ((rc = connect(sock, s_ptr, s_len)) >= 0)
+ if ( fastopen_blob && fastopen_blob->data && fastopen_blob->len
+ && send(sock, fastopen_blob->data, fastopen_blob->len, 0) < 0)
+ rc = -1;
+ }
+
save_errno = errno;
-alarm(0);
+ALARM_CLR(0);
/* There is a testing facility for simulating a connection timeout, as I
can't think of any other way of doing this. It converts a connection refused
into a timeout if the timeout is set to 999999. */
-if (running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
+if (f.running_in_test_harness && save_errno == ECONNREFUSED && timeout == 999999)
{
rc = -1;
save_errno = EINTR;
/* Success */
if (rc >= 0)
- {
- callout_address = string_sprintf("[%s]:%d", address, port);
return 0;
- }
/* A failure whose error code is "Interrupted system call" is in fact
an externally applied timeout if the signal handler has been run. */
Arguments:
type SOCK_DGRAM or SOCK_STREAM
af AF_INET6 or AF_INET for the socket type
- address the remote address, in text form
+ hostname host name, or ip address (as text)
portlo,porthi the remote port range
timeout a timeout
- connhost if not NULL, host_item filled in with connection details
+ connhost if not NULL, host_item to be filled in with connection details
errstr pointer for allocated string on error
+ fastopen_blob with SOCK_STREAM, if non-null, request TCP Fast Open.
+ Additionally, optional idempotent early-data to send
Return:
socket fd, or -1 on failure (having allocated an error string)
*/
int
ip_connectedsocket(int type, const uschar * hostname, int portlo, int porthi,
- int timeout, host_item * connhost, uschar ** errstr)
+ int timeout, host_item * connhost, uschar ** errstr, const blob * fastopen_blob)
{
-int namelen, port;
+int namelen;
host_item shost;
-host_item *h;
int af = 0, fd, fd4 = -1, fd6 = -1;
shost.next = NULL;
hostname[namelen - 1] == ']')
{
uschar * host = string_copyn(hostname+1, namelen-2);
-debug_printf("%s: 1\n", __FUNCTION__);
if (string_is_ip_address(host, NULL) == 0)
{
*errstr = string_sprintf("malformed IP address \"%s\"", hostname);
/* Otherwise check for an unadorned IP address */
else if (string_is_ip_address(hostname, NULL) != 0)
- {
-debug_printf("%s: 2\n", __FUNCTION__);
shost.name = shost.address = string_copyn(hostname, namelen);
- }
/* Otherwise lookup IP address(es) from the name */
else
{
-debug_printf("%s: 3\n", __FUNCTION__);
shost.name = string_copyn(hostname, namelen);
if (host_find_byname(&shost, NULL, HOST_FIND_QUALIFY_SINGLE,
NULL, FALSE) != HOST_FOUND)
/* Try to connect to the server - test each IP till one works */
-for (h = &shost; h; h = h->next)
+for (host_item * h = &shost; h; h = h->next)
{
-debug_printf("%s: 4 '%s'\n", __FUNCTION__, h->address);
fd = Ustrchr(h->address, ':') != 0
? fd6 < 0 ? (fd6 = ip_socket(type, af = AF_INET6)) : fd6
: fd4 < 0 ? (fd4 = ip_socket(type, af = AF_INET )) : fd4;
goto bad;
}
- for(port = portlo; port <= porthi; port++)
- if (ip_connect(fd, af, h->address, port, timeout) == 0)
+ for (int port = portlo; port <= porthi; port++)
+ if (ip_connect(fd, af, h->address, port, timeout, fastopen_blob) == 0)
{
- if (fd != fd6) close(fd6);
- if (fd != fd4) close(fd4);
+ if (fd6 >= 0 && fd != fd6) close(fd6);
+ if (fd4 >= 0 && fd != fd4) close(fd4);
if (connhost)
{
h->port = port;
}
+/*XXX TFO? */
int
-ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo)
+ip_tcpsocket(const uschar * hostport, uschar ** errstr, int tmo,
+ host_item * connhost)
{
int scan;
uschar hostname[256];
}
return ip_connectedsocket(SOCK_STREAM, hostname, portlow, porthigh,
- tmo, NULL, errstr);
+ tmo, connhost, errstr, NULL);
}
int
return -1;
}
+callout_address = string_copy(path);
server.sun_family = AF_UNIX;
-Ustrncpy(server.sun_path, path, sizeof(server.sun_path)-1);
+Ustrncpy(US server.sun_path, path, sizeof(server.sun_path)-1);
server.sun_path[sizeof(server.sun_path)-1] = '\0';
if (connect(sock, (struct sockaddr *) &server, sizeof(server)) < 0)
{
path, strerror(err));
return -1;
}
-callout_address = string_copy(path);
return sock;
}
+/* spec is either an absolute path (with a leading /), or
+a host (name or IP) and port (whitespace-separated).
+The port can be a range, dash-separated, or a single number.
+
+For a TCP socket, optionally fill in a host_item.
+*/
int
-ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo)
+ip_streamsocket(const uschar * spec, uschar ** errstr, int tmo,
+ host_item * connhost)
{
return *spec == '/'
- ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo);
+ ? ip_unixsocket(spec, errstr) : ip_tcpsocket(spec, errstr, tmo, connhost);
}
/*************************************************
{
int fodder = 1;
if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (uschar *)(&fodder), sizeof(fodder)) != 0)
+ US (&fodder), sizeof(fodder)) != 0)
log_write(0, LOG_MAIN, "setsockopt(SO_KEEPALIVE) on connection %s %s "
"failed: %s", torf? "to":"from", address, strerror(errno));
}
/*
Arguments:
fd the file descriptor
- timeout the timeout, seconds
+ timelimit the timeout endpoint, seconds-since-epoch
Returns: TRUE => ready for i/o
FALSE => timed out, or other error
*/
BOOL
-fd_ready(int fd, int timeout)
+fd_ready(int fd, time_t timelimit)
{
-fd_set select_inset;
-time_t start_recv = time(NULL);
-int time_left = timeout;
-int rc;
+int rc, time_left = timelimit - time(NULL);
if (time_left <= 0)
{
do
{
- struct timeval tv = { time_left, 0 };
- FD_ZERO (&select_inset);
- FD_SET (fd, &select_inset);
-
/*DEBUG(D_transport) debug_printf("waiting for data on fd\n");*/
- rc = select(fd + 1, (SELECT_ARG2_TYPE *)&select_inset, NULL, NULL, &tv);
+ rc = poll_one_fd(fd, POLLIN, time_left * 1000);
/* If some interrupt arrived, just retry. We presume this to be rare,
but it can happen (e.g. the SIGUSR1 signal sent by exiwhat causes
DEBUG(D_transport) debug_printf("EINTR while waiting for socket data\n");
/* Watch out, 'continue' jumps to the condition, not to the loops top */
- time_left = timeout - (time(NULL) - start_recv);
- if (time_left > 0) continue;
+ if ((time_left = timelimit - time(NULL)) > 0) continue;
}
if (rc <= 0)
/* Checking the FD_ISSET is not enough, if we're interrupted, the
select_inset may still contain the 'input'. */
}
-while (rc < 0 || !FD_ISSET(fd, &select_inset));
+while (rc < 0);
return TRUE;
}
result but no ready descriptor. Is this in fact possible?
Arguments:
- sock the socket
+ cctx the connection context (socket fd, possibly TLS context)
buffer to read into
bufsize the buffer size
- timeout the timeout
+ timelimit the timeout endpoint, seconds-since-epoch
Returns: > 0 => that much data read
<= 0 on error or EOF; errno set - zero for EOF
*/
int
-ip_recv(int sock, uschar *buffer, int buffsize, int timeout)
+ip_recv(client_conn_ctx * cctx, uschar * buffer, int buffsize, time_t timelimit)
{
int rc;
-if (!fd_ready(sock, timeout))
+if (!fd_ready(cctx->sock, timelimit))
return -1;
/* The socket is ready, read from it (via TLS if it's active). On EOF (i.e.
close down of the connection), set errno to zero; otherwise leave it alone. */
-#ifdef SUPPORT_TLS
-if (tls_out.active == sock)
- rc = tls_read(FALSE, buffer, buffsize);
-else if (tls_in.active == sock)
- rc = tls_read(TRUE, buffer, buffsize);
+#ifndef DISABLE_TLS
+if (cctx->tls_ctx) /* client TLS */
+ rc = tls_read(cctx->tls_ctx, buffer, buffsize);
+else if (tls_in.active.sock == cctx->sock) /* server TLS */
+ rc = tls_read(NULL, buffer, buffsize);
else
#endif
- rc = recv(sock, buffer, buffsize, 0);
+ rc = recv(cctx->sock, buffer, buffsize, 0);
if (rc > 0) return rc;
if (rc == 0) errno = 0;
void
dscp_list_to_stream(FILE *stream)
{
-int i;
-for (i=0; i < dscp_table_size; ++i)
+for (int i = 0; i < dscp_table_size; ++i)
fprintf(stream, "%s\n", dscp_table[i].name);
}