X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/04c7af3d028fa168fb736ce0494436adb8e5940c..a85c067ba6c6940512cf57ec213277a370d87e70:/src/src/daemon.c diff --git a/src/src/daemon.c b/src/src/daemon.c index 78a4d8ec2..0afc7ca86 100644 --- a/src/src/daemon.c +++ b/src/src/daemon.c @@ -2,9 +2,10 @@ * Exim - an Internet mail transport agent * *************************************************/ +/* Copyright (c) The Exim Maintainers 2020 - 2022 */ /* Copyright (c) University of Cambridge 1995 - 2018 */ -/* Copyright (c) The Exim Maintainers 2020 */ /* See the file NOTICE for conditions of use and distribution. */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* Functions concerned with running Exim as a daemon */ @@ -87,7 +88,7 @@ sigchld_seen = TRUE; } -/* SIGTERM handler. Try to get the damon pif file removed +/* SIGTERM handler. Try to get the daemon pid file removed before exiting. */ static void @@ -128,12 +129,31 @@ if (smtp_out) smtp_printf("421 %s\r\n", FALSE, smtp_msg); /************************************************* *************************************************/ +#ifndef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS +static void +unlink_notifier_socket(void) +{ +uschar * s = expand_string(notifier_socket); +DEBUG(D_any) debug_printf("unlinking notifier socket %s\n", s); +Uunlink(s); +} +#endif + + static void close_daemon_sockets(int daemon_notifier_fd, - int * listen_sockets, int listen_socket_count) + struct pollfd * fd_polls, int listen_socket_count) { -if (daemon_notifier_fd >= 0) (void) close(daemon_notifier_fd); -for (int i = 0; i < listen_socket_count; i++) (void) close(listen_sockets[i]); +if (daemon_notifier_fd >= 0) + { + (void) close(daemon_notifier_fd); + daemon_notifier_fd = -1; +#ifndef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS + unlink_notifier_socket(); +#endif + } + +for (int i = 0; i < listen_socket_count; i++) (void) close(fd_polls[i].fd); } @@ -148,7 +168,7 @@ is required so that they can be closed in the sub-process. Take care not to leak store in this process - reset the stacking pool at the end. Arguments: - listen_sockets sockets which are listening for incoming calls + fd_polls sockets which are listening for incoming calls listen_socket_count count of listening sockets accept_socket socket of the current accepted call accepted socket information about the current call @@ -157,7 +177,7 @@ Returns: nothing */ static void -handle_smtp_call(int *listen_sockets, int listen_socket_count, +handle_smtp_call(struct pollfd *fd_polls, int listen_socket_count, int accept_socket, struct sockaddr *accepted) { pid_t pid; @@ -256,7 +276,7 @@ subprocess because it might take time. */ if (smtp_load_reserve >= 0) { load_average = OS_GETLOADAVG(); - if (smtp_reserve_hosts == NULL && load_average > smtp_load_reserve) + if (!smtp_reserve_hosts && load_average > smtp_load_reserve) { DEBUG(D_any) debug_printf("rejecting SMTP connection: load average = %.2f\n", (double)load_average/1000.0); @@ -355,7 +375,7 @@ if (LOGGING(smtp_connection)) { uschar *list = hosts_connection_nolog; memset(sender_host_cache, 0, sizeof(sender_host_cache)); - if (list != NULL && verify_check_host(&list) == OK) + if (list && verify_check_host(&list) == OK) save_log_selector &= ~L_smtp_connection; else log_write(L_smtp_connection, LOG_MAIN, "SMTP connection from %s " @@ -377,12 +397,18 @@ if (pid == 0) int save_debug_selector = debug_selector; BOOL local_queue_only; BOOL session_local_queue_only; - #ifdef SA_NOCLDWAIT +#ifdef SA_NOCLDWAIT struct sigaction act; - #endif +#endif smtp_accept_count++; /* So that it includes this process */ + /* If the listen backlog was over the monitoring level, log it. */ + + if (smtp_listen_backlog > smtp_backlog_monitor) + log_write(0, LOG_MAIN, "listen backlog %d I=[%s]:%d", + smtp_listen_backlog, interface_address, interface_port); + /* May have been modified for the subprocess */ *log_selector = save_log_selector; @@ -434,7 +460,7 @@ if (pid == 0) extensive comment before the reception loop in exim.c for a fuller explanation of this logic. */ - close_daemon_sockets(daemon_notifier_fd, listen_sockets, listen_socket_count); + close_daemon_sockets(daemon_notifier_fd, fd_polls, listen_socket_count); /* Set FD_CLOEXEC on the SMTP socket. We don't want any rogue child processes to be able to communicate with them, under any circumstances. */ @@ -443,15 +469,16 @@ if (pid == 0) (void)fcntl(dup_accept_socket, F_SETFD, fcntl(dup_accept_socket, F_GETFD) | FD_CLOEXEC); - #ifdef SA_NOCLDWAIT +#ifdef SA_NOCLDWAIT act.sa_handler = SIG_IGN; sigemptyset(&(act.sa_mask)); act.sa_flags = SA_NOCLDWAIT; sigaction(SIGCHLD, &act, NULL); - #else +#else signal(SIGCHLD, SIG_IGN); - #endif +#endif signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); /* Attempt to get an id from the sending machine via the RFC 1413 protocol. We do this in the sub-process in order not to hold up the @@ -535,7 +562,7 @@ if (pid == 0) } if (message_id[0] == 0) continue; /* No message was accepted */ } - else + else /* bad smtp_setup_msg() */ { if (smtp_out) { @@ -655,16 +682,17 @@ if (pid == 0) { pid_t dpid; - /* Before forking, ensure that the C output buffer is flushed. Otherwise - anything that it in it will get duplicated, leading to duplicate copies - of the pending output. */ - - mac_smtp_fflush(); + /* We used to flush smtp_out before forking so that buffered data was not + duplicated, but now we want to pipeline the responses for data and quit. + Instead, hard-close the fd underlying smtp_out right after fork to discard + the data buffer. */ if ((dpid = exim_fork(US"daemon-accept-delivery")) == 0) { (void)fclose(smtp_in); + (void)close(fileno(smtp_out)); (void)fclose(smtp_out); + smtp_in = smtp_out = NULL; /* Don't ever molest the parent's SSL connection, but do clean up the data structures if necessary. */ @@ -678,6 +706,7 @@ if (pid == 0) signal(SIGHUP, SIG_DFL); signal(SIGCHLD, SIG_DFL); signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); if (geteuid() != root_uid && !deliver_drop_privilege) { @@ -765,10 +794,8 @@ else (void)close(dup_accept_socket); the incoming host address and an expanded active_hostname. */ log_close_all(); -interface_address = -sender_host_address = NULL; +interface_address = sender_host_name = sender_host_address = NULL; store_reset(reset_point); -sender_host_address = NULL; } @@ -911,7 +938,6 @@ while ((pid = waitpid(-1, &status, WNOHANG)) > 0) } - static void set_pid_file_path(void) { @@ -920,37 +946,144 @@ if (override_pid_file_path) if (!*pid_file_path) pid_file_path = string_sprintf("%s/exim-daemon.pid", spool_directory); + +if (pid_file_path[0] != '/') + log_write(0, LOG_PANIC_DIE, "pid file path %s must be absolute\n", pid_file_path); } -/* Remove the daemon's pidfile. Note: runs with root privilege, -as a direct child of the daemon. Does not return. */ +enum pid_op { PID_WRITE, PID_CHECK, PID_DELETE }; -void -delete_pid_file(void) +/* Do various pid file operations as safe as possible. Ideally we'd just +drop the privileges for creation of the pid file and not care at all about removal of +the file. FIXME. +Returns: true on success, false + errno==EACCES otherwise +*/ + +static BOOL +operate_on_pid_file(const enum pid_op operation, const pid_t pid) { -uschar * daemon_pid = string_sprintf("%d\n", (int)getppid()); -FILE * f; +char pid_line[sizeof(int) * 3 + 2]; +const int pid_len = snprintf(pid_line, sizeof(pid_line), "%d\n", (int)pid); +BOOL lines_match = FALSE; +uschar * path, * base, * dir; + +const int dir_flags = O_RDONLY | O_NONBLOCK; +const int base_flags = O_NOFOLLOW | O_NONBLOCK; +const mode_t base_mode = 0644; +struct stat sb; +int cwd_fd = -1, dir_fd = -1, base_fd = -1; +BOOL success = FALSE; +errno = EACCES; set_pid_file_path(); -if ((f = Ufopen(pid_file_path, "rb"))) +if (!f.running_in_test_harness && real_uid != root_uid && real_uid != exim_uid) goto cleanup; +if (pid_len < 2 || pid_len >= (int)sizeof(pid_line)) goto cleanup; + +path = string_copy(pid_file_path); +if ((base = Ustrrchr(path, '/')) == NULL) /* should not happen, but who knows */ + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "pid file path \"%s\" does not contain a '/'", pid_file_path); + +dir = base != path ? path : US"/"; +*base++ = '\0'; + +if (!dir || !*dir || *dir != '/') goto cleanup; +if (!base || !*base || Ustrchr(base, '/') != NULL) goto cleanup; + +cwd_fd = open(".", dir_flags); +if (cwd_fd < 0 || fstat(cwd_fd, &sb) != 0 || !S_ISDIR(sb.st_mode)) goto cleanup; +dir_fd = open(CS dir, dir_flags); +if (dir_fd < 0 || fstat(dir_fd, &sb) != 0 || !S_ISDIR(sb.st_mode)) goto cleanup; + +/* emulate openat */ +if (fchdir(dir_fd) != 0) goto cleanup; +base_fd = open(CS base, O_RDONLY | base_flags); +if (fchdir(cwd_fd) != 0) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "can't return to previous working dir: %s", strerror(errno)); + +if (base_fd >= 0) { - if ( fgets(CS big_buffer, big_buffer_size, f) - && Ustrcmp(daemon_pid, big_buffer) == 0 - ) - if (Uunlink(pid_file_path) == 0) + char line[sizeof(pid_line)]; + ssize_t len = -1; + + if (fstat(base_fd, &sb) != 0 || !S_ISREG(sb.st_mode)) goto cleanup; + if ((sb.st_mode & 07777) != base_mode || sb.st_nlink != 1) goto cleanup; + if (sb.st_size < 2 || sb.st_size >= (off_t)sizeof(line)) goto cleanup; + + len = read(base_fd, line, sizeof(line)); + if (len != (ssize_t)sb.st_size) goto cleanup; + line[len] = '\0'; + + if (strspn(line, "0123456789") != (size_t)len-1) goto cleanup; + if (line[len-1] != '\n') goto cleanup; + lines_match = len == pid_len && strcmp(line, pid_line) == 0; + } + +if (operation == PID_WRITE) + { + if (!lines_match) + { + if (base_fd >= 0) { - DEBUG(D_any) - debug_printf("%s unlink: %s\n", pid_file_path, strerror(errno)); - } - else - DEBUG(D_any) - debug_printf("unlinked %s\n", pid_file_path); - fclose(f); + int error = -1; + /* emulate unlinkat */ + if (fchdir(dir_fd) != 0) goto cleanup; + error = unlink(CS base); + if (fchdir(cwd_fd) != 0) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "can't return to previous working dir: %s", strerror(errno)); + if (error) goto cleanup; + (void)close(base_fd); + base_fd = -1; + } + /* emulate openat */ + if (fchdir(dir_fd) != 0) goto cleanup; + base_fd = open(CS base, O_WRONLY | O_CREAT | O_EXCL | base_flags, base_mode); + if (fchdir(cwd_fd) != 0) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "can't return to previous working dir: %s", strerror(errno)); + if (base_fd < 0) goto cleanup; + if (fchmod(base_fd, base_mode) != 0) goto cleanup; + if (write(base_fd, pid_line, pid_len) != pid_len) goto cleanup; + DEBUG(D_any) debug_printf("pid written to %s\n", pid_file_path); + } } else - DEBUG(D_any) - debug_printf("%s\n", string_open_failed("pid file %s", pid_file_path)); + { + if (!lines_match) goto cleanup; + if (operation == PID_DELETE) + { + int error = -1; + /* emulate unlinkat */ + if (fchdir(dir_fd) != 0) goto cleanup; + error = unlink(CS base); + if (fchdir(cwd_fd) != 0) + log_write(0, LOG_MAIN|LOG_PANIC_DIE, "can't return to previous working dir: %s", strerror(errno)); + if (error) goto cleanup; + } + } + +success = TRUE; +errno = 0; + +cleanup: +if (cwd_fd >= 0) (void)close(cwd_fd); +if (dir_fd >= 0) (void)close(dir_fd); +if (base_fd >= 0) (void)close(base_fd); +return success; +} + + +/* Remove the daemon's pidfile. Note: runs with root privilege, +as a direct child of the daemon. Does not return. */ + +void +delete_pid_file(void) +{ +const BOOL success = operate_on_pid_file(PID_DELETE, getppid()); + +DEBUG(D_any) + debug_printf("delete pid file %s %s: %s\n", pid_file_path, + success ? "success" : "failure", strerror(errno)); + exim_exit(EXIT_SUCCESS); } @@ -963,6 +1096,7 @@ daemon_die(void) { int pid; +DEBUG(D_any) debug_printf("SIGTERM/SIGINT seen\n"); #if !defined(DISABLE_TLS) && (defined(EXIM_HAVE_INOTIFY) || defined(EXIM_HAVE_KEVENT)) tls_watch_invalidate(); #endif @@ -972,11 +1106,7 @@ if (daemon_notifier_fd >= 0) close(daemon_notifier_fd); daemon_notifier_fd = -1; #ifndef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS - { - uschar * s = expand_string(notifier_socket); - DEBUG(D_any) debug_printf("unlinking notifier socket %s\n", s); - Uunlink(s); - } + unlink_notifier_socket(); #endif } @@ -1003,13 +1133,43 @@ exim_exit(EXIT_SUCCESS); * Listener socket for local work prompts * *************************************************/ +ssize_t +daemon_client_sockname(struct sockaddr_un * sup, uschar ** sname) +{ +#ifdef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS +sup->sun_path[0] = 0; /* Abstract local socket addr - Linux-specific? */ +return offsetof(struct sockaddr_un, sun_path) + 1 + + snprintf(sup->sun_path+1, sizeof(sup->sun_path)-1, "exim_%d", getpid()); +#else +*sname = string_sprintf("%s/p_%d", spool_directory, getpid()); +return offsetof(struct sockaddr_un, sun_path) + + snprintf(sup->sun_path, sizeof(sup->sun_path), "%s", sname); +#endif +} + +ssize_t +daemon_notifier_sockname(struct sockaddr_un * sup) +{ +#ifdef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS +sup->sun_path[0] = 0; /* Abstract local socket addr - Linux-specific? */ +return offsetof(struct sockaddr_un, sun_path) + 1 + + snprintf(sup->sun_path+1, sizeof(sup->sun_path)-1, "%s", + expand_string(notifier_socket)); +#else +return offsetof(struct sockaddr_un, sun_path) + + snprintf(sup->sun_path, sizeof(sup->sun_path), "%s", + expand_string(notifier_socket)); +#endif +} + + static void daemon_notifier_socket(void) { int fd; const uschar * where; struct sockaddr_un sa_un = {.sun_family = AF_UNIX}; -int len; +ssize_t len; if (!notifier_socket || !*notifier_socket) { @@ -1034,20 +1194,15 @@ if ((fd = socket(PF_UNIX, SOCK_DGRAM, 0)) < 0) (void)fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); #endif +len = daemon_notifier_sockname(&sa_un); + #ifdef EXIM_HAVE_ABSTRACT_UNIX_SOCKETS -sa_un.sun_path[0] = 0; /* Abstract local socket addr - Linux-specific? */ -len = offsetof(struct sockaddr_un, sun_path) + 1 - + snprintf(sa_un.sun_path+1, sizeof(sa_un.sun_path)-1, "%s", - expand_string(notifier_socket)); DEBUG(D_any) debug_printf(" @%s\n", sa_un.sun_path+1); #else /* filesystem-visible and persistent; will neeed removal */ -len = offsetof(struct sockaddr_un, sun_path) - + snprintf(sa_un.sun_path, sizeof(sa_un.sun_path), "%s", - expand_string(notifier_socket)); DEBUG(D_any) debug_printf(" %s\n", sa_un.sun_path); #endif -if (bind(fd, (const struct sockaddr *)&sa_un, len) < 0) +if (bind(fd, (const struct sockaddr *)&sa_un, (socklen_t)len) < 0) { where = US"bind"; goto bad; } #ifdef SO_PASSCRED /* Linux */ @@ -1076,7 +1231,11 @@ bad: static uschar queuerun_msgid[MESSAGE_ID_LENGTH+1]; -/* Return TRUE if a sigalrm should be emulated */ +/* The notifier socket has something to read. Pull the message from it, decode +and do the action. + +Return TRUE if a sigalrm should be emulated */ + static BOOL daemon_notification(void) { @@ -1126,7 +1285,6 @@ for (struct cmsghdr * cp = CMSG_FIRSTHDR(&msg); { DEBUG(D_queue_run) debug_printf("%s: sender creds pid %d uid %d gid %d\n", __FUNCTION__, (int)cr->pid, (int)cr->uid, (int)cr->gid); - return FALSE; } # elif defined(LOCAL_CREDS) /* BSD-ish */ struct sockcred * cr = (struct sockcred *) CMSG_DATA(cp); @@ -1134,7 +1292,6 @@ for (struct cmsghdr * cp = CMSG_FIRSTHDR(&msg); { DEBUG(D_queue_run) debug_printf("%s: sender creds pid ??? uid %d gid %d\n", __FUNCTION__, (int)cr->sc_uid, (int)cr->sc_gid); - return FALSE; } # endif break; @@ -1165,13 +1322,18 @@ switch (buf[0]) (const struct sockaddr *)&sa_un, msg.msg_namelen) < 0) log_write(0, LOG_MAIN|LOG_PANIC, "%s: sendto: %s\n", __FUNCTION__, strerror(errno)); - return FALSE; + break; } + + case NOTIFY_REGEX: + regex_at_daemon(buf); + break; } return FALSE; } + /************************************************* * Exim Daemon Mainline * *************************************************/ @@ -1198,10 +1360,10 @@ There are no arguments to this function, and it never returns. */ void daemon_go(void) { -struct passwd *pw; -int *listen_sockets = NULL; -int listen_socket_count = 0; -ip_address_item *addresses = NULL; +struct passwd * pw; +struct pollfd * fd_polls, * tls_watch_poll = NULL, * dnotify_poll = NULL; +int listen_socket_count = 0, poll_fd_count; +ip_address_item * addresses = NULL; time_t last_connection_time = (time_t)0; int local_queue_run_max = atoi(CS expand_string(queue_run_max)); @@ -1212,16 +1374,21 @@ debugging lines get the pid added. */ DEBUG(D_any|D_v) debug_selector |= D_pid; +/* Allocate enough pollstructs for inetd mode plus the ancillary sockets; +also used when there are no listen sockets. */ + +fd_polls = store_get(sizeof(struct pollfd) * 3, GET_UNTAINTED); + if (f.inetd_wait_mode) { listen_socket_count = 1; - listen_sockets = store_get(sizeof(int), FALSE); (void) close(3); if (dup2(0, 3) == -1) log_write(0, LOG_MAIN|LOG_PANIC_DIE, "failed to dup inetd socket safely away: %s", strerror(errno)); - listen_sockets[0] = 3; + fd_polls[0].fd = 3; + fd_polls[0].events = POLLIN; (void) close(0); (void) close(1); (void) close(2); @@ -1258,11 +1425,11 @@ if (f.inetd_wait_mode || f.daemon_listen) for those OS for which this is necessary the first time it is called (in order to perform an "open" on the kernel memory file). */ - #ifdef LOAD_AVG_NEEDS_ROOT +#ifdef LOAD_AVG_NEEDS_ROOT if (queue_only_load >= 0 || smtp_load_reserve >= 0 || (deliver_queue_load_max >= 0 && deliver_drop_privilege)) (void)os_getloadavg(); - #endif +#endif } @@ -1398,7 +1565,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) sep = 0; while ((s = string_nextinlist(&list, &sep, NULL, 0))) pct++; - default_smtp_port = store_get((pct+1) * sizeof(int), FALSE); + default_smtp_port = store_get((pct+1) * sizeof(int), GET_UNTAINTED); list = daemon_smtp_port; sep = 0; for (pct = 0; @@ -1426,6 +1593,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) list = tls_in.on_connect_ports; sep = 0; + /* the list isn't expanded so cannot be tainted. If it ever is we will trap here */ while ((s = string_nextinlist(&list, &sep, big_buffer, big_buffer_size))) if (!isdigit(*s)) { @@ -1486,7 +1654,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) ipa->port = default_smtp_port[0]; for (int i = 1; default_smtp_port[i] > 0; i++) { - ip_address_item *new = store_get(sizeof(ip_address_item), FALSE); + ip_address_item * new = store_get(sizeof(ip_address_item), GET_UNTAINTED); memcpy(new->address, ipa->address, Ustrlen(ipa->address) + 1); new->port = default_smtp_port[i]; @@ -1540,11 +1708,16 @@ if (f.daemon_listen && !f.inetd_wait_mode) } } - /* Get a vector to remember all the sockets in */ + /* Get a vector to remember all the sockets in. + Two extra elements for the ancillary sockets */ for (ipa = addresses; ipa; ipa = ipa->next) listen_socket_count++; - listen_sockets = store_get(sizeof(int) * listen_socket_count, FALSE); + fd_polls = store_get(sizeof(struct pollfd) * (listen_socket_count + 2), + GET_UNTAINTED); + for (struct pollfd * p = fd_polls; p < fd_polls + listen_socket_count + 2; + p++) + { p->fd = -1; p->events = POLLIN; } } /* daemon_listen but not inetd_wait_mode */ @@ -1567,7 +1740,7 @@ if (f.daemon_listen) if (smtp_accept_max > 0) { - smtp_slots = store_get(smtp_accept_max * sizeof(smtp_slot), FALSE); + smtp_slots = store_get(smtp_accept_max * sizeof(smtp_slot), GET_UNTAINTED); for (int i = 0; i < smtp_accept_max; i++) smtp_slots[i] = empty_smtp_slot; } } @@ -1604,15 +1777,19 @@ if (f.background_daemon) daemon as the result of a SIGHUP. In this case, there is no need to do anything, because the controlling terminal has long gone. Otherwise, fork, in case current process is a process group leader (see 'man setsid' for an - explanation) before calling setsid(). */ + explanation) before calling setsid(). + All other forks want daemon_listen cleared. Rather than blow a register, jsut + restore it here. */ if (getppid() != 1) { + BOOL daemon_listen = f.daemon_listen; pid_t pid = exim_fork(US"daemon"); if (pid < 0) log_write(0, LOG_MAIN|LOG_PANIC_DIE, "fork() failed when starting daemon: %s", strerror(errno)); if (pid > 0) exit(EXIT_SUCCESS); /* in parent process, just exit */ (void)setsid(); /* release controlling terminal */ + f.daemon_listen = daemon_listen; } } @@ -1635,8 +1812,8 @@ if (f.daemon_listen && !f.inetd_wait_mode) for (ipa = addresses, sk = 0; sk < listen_socket_count; ipa = ipa->next, sk++) { BOOL wildcard; - ip_address_item *ipa2; - int af; + ip_address_item * ipa2; + int fd, af; if (Ustrchr(ipa->address, ':') != NULL) { @@ -1649,7 +1826,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) wildcard = ipa->address[0] == 0; } - if ((listen_sockets[sk] = ip_socket(SOCK_STREAM, af)) < 0) + if ((fd_polls[sk].fd = fd = ip_socket(SOCK_STREAM, af)) < 0) { if (check_special_case(0, addresses, ipa, FALSE)) { @@ -1658,7 +1835,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) goto SKIP_SOCKET; } log_write(0, LOG_PANIC_DIE, "IPv%c socket creation failed: %s", - (af == AF_INET6)? '6' : '4', strerror(errno)); + af == AF_INET6 ? '6' : '4', strerror(errno)); } /* If this is an IPv6 wildcard socket, set IPV6_V6ONLY if that option is @@ -1667,8 +1844,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) #ifdef IPV6_V6ONLY if (af == AF_INET6 && wildcard && - setsockopt(listen_sockets[sk], IPPROTO_IPV6, IPV6_V6ONLY, CS (&on), - sizeof(on)) < 0) + setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)) < 0) log_write(0, LOG_MAIN, "Setting IPV6_V6ONLY on daemon's IPv6 wildcard " "socket failed (%s): carrying on without it", strerror(errno)); #endif /* IPV6_V6ONLY */ @@ -1677,16 +1853,14 @@ if (f.daemon_listen && !f.inetd_wait_mode) is being handled. Without this, a connection will prevent reuse of the smtp port for listening. */ - if (setsockopt(listen_sockets[sk], SOL_SOCKET, SO_REUSEADDR, - US (&on), sizeof(on)) < 0) + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) log_write(0, LOG_MAIN|LOG_PANIC_DIE, "setting SO_REUSEADDR on socket " "failed when starting daemon: %s", strerror(errno)); /* Set TCP_NODELAY; Exim does its own buffering. There is a switch to disable this because it breaks some broken clients. */ - if (tcp_nodelay) setsockopt(listen_sockets[sk], IPPROTO_TCP, TCP_NODELAY, - US (&on), sizeof(on)); + if (tcp_nodelay) setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on)); /* Now bind the socket to the required port; if Exim is being restarted it may not always be possible to bind immediately, even with SO_REUSEADDR @@ -1704,12 +1878,12 @@ if (f.daemon_listen && !f.inetd_wait_mode) for(;;) { uschar *msg, *addr; - if (ip_bind(listen_sockets[sk], af, ipa->address, ipa->port) >= 0) break; + if (ip_bind(fd, af, ipa->address, ipa->port) >= 0) break; if (check_special_case(errno, addresses, ipa, TRUE)) { DEBUG(D_any) debug_printf("wildcard IPv4 bind() failed after IPv6 " "listen() success; EADDRINUSE ignored\n"); - (void)close(listen_sockets[sk]); + (void)close(fd); goto SKIP_SOCKET; } msg = US strerror(errno); @@ -1737,30 +1911,30 @@ if (f.daemon_listen && !f.inetd_wait_mode) else debug_printf("listening on %s port %d\n", ipa->address, ipa->port); + /* Start listening on the bound socket, establishing the maximum backlog of + connections that is allowed. On success, add to the set of sockets for select + and continue to the next address. */ + #if defined(TCP_FASTOPEN) && !defined(__APPLE__) if ( f.tcp_fastopen_ok - && setsockopt(listen_sockets[sk], IPPROTO_TCP, TCP_FASTOPEN, + && setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &smtp_connect_backlog, sizeof(smtp_connect_backlog))) { DEBUG(D_any) debug_printf("setsockopt FASTOPEN: %s\n", strerror(errno)); f.tcp_fastopen_ok = FALSE; } #endif - - /* Start listening on the bound socket, establishing the maximum backlog of - connections that is allowed. On success, continue to the next address. */ - - if (listen(listen_sockets[sk], smtp_connect_backlog) >= 0) + if (listen(fd, smtp_connect_backlog) >= 0) { #if defined(TCP_FASTOPEN) && defined(__APPLE__) if ( f.tcp_fastopen_ok - && setsockopt(listen_sockets[sk], IPPROTO_TCP, TCP_FASTOPEN, - &on, sizeof(on))) + && setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &on, sizeof(on))) { DEBUG(D_any) debug_printf("setsockopt FASTOPEN: %s\n", strerror(errno)); f.tcp_fastopen_ok = FALSE; } #endif + fd_polls[sk].fd = fd; continue; } @@ -1778,7 +1952,7 @@ if (f.daemon_listen && !f.inetd_wait_mode) DEBUG(D_any) debug_printf("wildcard IPv4 listen() failed after IPv6 " "listen() success; EADDRINUSE ignored\n"); - (void)close(listen_sockets[sk]); + (void)close(fd); /* Come here if there has been a problem with the socket which we are going to ignore. We remove the address from the chain, and back up the @@ -1819,18 +1993,11 @@ The variable daemon_write_pid is used to control this. */ if (f.running_in_test_harness || write_pid) { - FILE *f; - - set_pid_file_path(); - if ((f = modefopen(pid_file_path, "wb", 0644))) - { - (void)fprintf(f, "%d\n", (int)getpid()); - (void)fclose(f); - DEBUG(D_any) debug_printf("pid written to %s\n", pid_file_path); - } - else - DEBUG(D_any) - debug_printf("%s\n", string_open_failed("pid file %s", pid_file_path)); + const enum pid_op operation = (f.running_in_test_harness + || real_uid == root_uid + || (real_uid == exim_uid && !override_pid_file_path)) ? PID_WRITE : PID_CHECK; + if (!operate_on_pid_file(operation, getpid())) + DEBUG(D_any) debug_printf("%s pid file %s: %s\n", (operation == PID_WRITE) ? "write" : "check", pid_file_path, strerror(errno)); } /* Set up the handler for SIGHUP, which causes a restart of the daemon. */ @@ -1859,7 +2026,7 @@ of them (and also if we are doing queue runs). */ if (queue_interval > 0 && local_queue_run_max > 0) { - queue_pid_slots = store_get(local_queue_run_max * sizeof(pid_t), FALSE); + queue_pid_slots = store_get(local_queue_run_max * sizeof(pid_t), GET_UNTAINTED); for (int i = 0; i < local_queue_run_max; i++) queue_pid_slots[i] = 0; } @@ -1871,6 +2038,7 @@ os_non_restarting_signal(SIGCHLD, main_sigchld_handler); sigterm_seen = FALSE; os_non_restarting_signal(SIGTERM, main_sigterm_handler); +os_non_restarting_signal(SIGINT, main_sigterm_handler); /* If we are to run the queue periodically, pretend the alarm has just gone off. This will cause the first queue-runner to get kicked off straight away. */ @@ -1959,7 +2127,7 @@ else if (f.daemon_listen) if (*--p == '}') *p = '\0'; /* drop EOL */ while (isdigit(*--p)) ; /* char before port */ - i2->log = *p == ':' /* no list yet? */ + i2->log = *p == ':' /* no list yet? { */ ? string_sprintf("%.*s{%s,%d}", (int)(p - i2->log + 1), i2->log, p+1, ipa->port) : string_sprintf("%s,%d}", i2->log, ipa->port); @@ -2047,6 +2215,24 @@ spf_init(); tls_daemon_init(); #endif +/* Add ancillary sockets to the set for select */ + +poll_fd_count = listen_socket_count; +#ifndef DISABLE_TLS +if (tls_watch_fd >= 0) + { + tls_watch_poll = &fd_polls[poll_fd_count++]; + tls_watch_poll->fd = tls_watch_fd; + tls_watch_poll->events = POLLIN; + } +#endif +if (daemon_notifier_fd >= 0) + { + dnotify_poll = &fd_polls[poll_fd_count++]; + dnotify_poll->fd = daemon_notifier_fd; + dnotify_poll->events = POLLIN; + } + /* Close the log so it can be renamed and moved. In the few cases below where this long-running process writes to the log (always exceptional conditions), it closes the log afterwards, for the same reason. */ @@ -2067,13 +2253,6 @@ report_time_since(×tamp_startup, US"daemon loop start"); /* testcase 0022 * for (;;) { - #if HAVE_IPV6 - struct sockaddr_in6 accepted; - #else - struct sockaddr_in accepted; - #endif - - EXIM_SOCKLEN_T len; pid_t pid; if (sigterm_seen) @@ -2151,13 +2330,14 @@ for (;;) /* Close any open listening sockets in the child */ close_daemon_sockets(daemon_notifier_fd, - listen_sockets, listen_socket_count); + fd_polls, listen_socket_count); /* Reset SIGHUP and SIGCHLD in the child in both cases. */ signal(SIGHUP, SIG_DFL); signal(SIGCHLD, SIG_DFL); signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); /* Re-exec if privilege has been given up, unless deliver_drop_ privilege is set. Reset SIGALRM before exec(). */ @@ -2278,29 +2458,8 @@ for (;;) if (f.daemon_listen) { - int lcount, select_errno; - int max_socket = 0; + int lcount; BOOL select_failed = FALSE; - fd_set select_listen; - - FD_ZERO(&select_listen); -#ifndef DISABLE_TLS - if (tls_watch_fd >= 0) - { - FD_SET(tls_watch_fd, &select_listen); - if (tls_watch_fd > max_socket) max_socket = tls_watch_fd; - } -#endif - if (daemon_notifier_fd >= 0) - { - FD_SET(daemon_notifier_fd, &select_listen); - if (daemon_notifier_fd > max_socket) max_socket = daemon_notifier_fd; - } - for (int sk = 0; sk < listen_socket_count; sk++) - { - FD_SET(listen_sockets[sk], &select_listen); - if (listen_sockets[sk] > max_socket) max_socket = listen_sockets[sk]; - } DEBUG(D_any) debug_printf("Listening...\n"); @@ -2317,8 +2476,7 @@ for (;;) errno = EINTR; } else - lcount = select(max_socket + 1, (SELECT_ARG2_TYPE *)&select_listen, - NULL, NULL, NULL); + lcount = poll(fd_polls, poll_fd_count, -1); if (lcount < 0) { @@ -2333,14 +2491,23 @@ for (;;) old one had just finished. Preserve the errno from any select() failure for the use of the common select/accept error processing below. */ - select_errno = errno; - handle_ending_processes(); - errno = select_errno; + { + int select_errno = errno; + handle_ending_processes(); #ifndef DISABLE_TLS - /* Create or rotate any required keys; handle (delayed) filewatch event */ - tls_daemon_tick(); + { + int old_tfd; + /* Create or rotate any required keys; handle (delayed) filewatch event */ + + if ((old_tfd = tls_daemon_tick()) >= 0) + for (struct pollfd * p = &fd_polls[listen_socket_count]; + p < fd_polls + poll_fd_count; p++) + if (p->fd == old_tfd) { p->fd = tls_watch_fd ; break; } + } #endif + errno = select_errno; + } /* Loop for all the sockets that are currently ready to go. If select actually failed, we have set the count to 1 and select_failed=TRUE, so as @@ -2349,32 +2516,57 @@ for (;;) while (lcount-- > 0) { int accept_socket = -1; +#if HAVE_IPV6 + struct sockaddr_in6 accepted; +#else + struct sockaddr_in accepted; +#endif if (!select_failed) { #if !defined(DISABLE_TLS) && (defined(EXIM_HAVE_INOTIFY) || defined(EXIM_HAVE_KEVENT)) - if (tls_watch_fd >= 0 && FD_ISSET(tls_watch_fd, &select_listen)) + if (tls_watch_poll && tls_watch_poll->revents & POLLIN) { - FD_CLR(tls_watch_fd, &select_listen); + tls_watch_poll->revents = 0; tls_watch_trigger_time = time(NULL); /* Set up delayed event */ tls_watch_discard_event(tls_watch_fd); break; /* to top of daemon loop */ } #endif - if ( daemon_notifier_fd >= 0 - && FD_ISSET(daemon_notifier_fd, &select_listen)) + if (dnotify_poll && dnotify_poll->revents & POLLIN) { - FD_CLR(daemon_notifier_fd, &select_listen); + dnotify_poll->revents = 0; sigalrm_seen = daemon_notification(); break; /* to top of daemon loop */ } - for (int sk = 0; sk < listen_socket_count; sk++) - if (FD_ISSET(listen_sockets[sk], &select_listen)) + for (struct pollfd * p = fd_polls; p < fd_polls + listen_socket_count; + p++) + if (p->revents & POLLIN) { - len = sizeof(accepted); - accept_socket = accept(listen_sockets[sk], - (struct sockaddr *)&accepted, &len); - FD_CLR(listen_sockets[sk], &select_listen); + EXIM_SOCKLEN_T alen = sizeof(accepted); +#ifdef TCP_INFO + struct tcp_info ti; + socklen_t tlen = sizeof(ti); + + /* If monitoring the backlog is wanted, grab for later logging */ + + smtp_listen_backlog = 0; + if ( smtp_backlog_monitor > 0 + && getsockopt(p->fd, IPPROTO_TCP, TCP_INFO, &ti, &tlen) == 0) + { +# ifdef EXIM_HAVE_TCPI_UNACKED + DEBUG(D_interface) debug_printf("listen fd %d queue max %u curr %u\n", + p->fd, ti.tcpi_sacked, ti.tcpi_unacked); + smtp_listen_backlog = ti.tcpi_unacked; +# elif defined(__FreeBSD__) /* This does not work. Investigate kernel sourcecode. */ + DEBUG(D_interface) debug_printf("listen fd %d queue max %u curr %u\n", + p->fd, ti.__tcpi_sacked, ti.__tcpi_unacked); + smtp_listen_backlog = ti.__tcpi_unacked; +# endif + } +#endif + p->revents = 0; + accept_socket = accept(p->fd, (struct sockaddr *)&accepted, &alen); break; } } @@ -2396,48 +2588,46 @@ for (;;) accept_retry_errno = errno; accept_retry_select_failed = select_failed; } - else - { - if (errno != accept_retry_errno || - select_failed != accept_retry_select_failed || - accept_retry_count >= 50) - { - log_write(0, LOG_MAIN | ((accept_retry_count >= 50)? LOG_PANIC : 0), - "%d %s() failure%s: %s", - accept_retry_count, - accept_retry_select_failed? "select" : "accept", - (accept_retry_count == 1)? "" : "s", - strerror(accept_retry_errno)); - log_close_all(); - accept_retry_count = 0; - accept_retry_errno = errno; - accept_retry_select_failed = select_failed; - } - } + else if ( errno != accept_retry_errno + || select_failed != accept_retry_select_failed + || accept_retry_count >= 50) + { + log_write(0, LOG_MAIN | (accept_retry_count >= 50 ? LOG_PANIC : 0), + "%d %s() failure%s: %s", + accept_retry_count, + accept_retry_select_failed ? "select" : "accept", + accept_retry_count == 1 ? "" : "s", + strerror(accept_retry_errno)); + log_close_all(); + accept_retry_count = 0; + accept_retry_errno = errno; + accept_retry_select_failed = select_failed; + } accept_retry_count++; } - - else - { - if (accept_retry_count > 0) - { - log_write(0, LOG_MAIN, "%d %s() failure%s: %s", - accept_retry_count, - accept_retry_select_failed? "select" : "accept", - (accept_retry_count == 1)? "" : "s", - strerror(accept_retry_errno)); - log_close_all(); - accept_retry_count = 0; - } - } + else if (accept_retry_count > 0) + { + log_write(0, LOG_MAIN, "%d %s() failure%s: %s", + accept_retry_count, + accept_retry_select_failed ? "select" : "accept", + accept_retry_count == 1 ? "" : "s", + strerror(accept_retry_errno)); + log_close_all(); + accept_retry_count = 0; + } /* If select/accept succeeded, deal with the connection. */ if (accept_socket >= 0) { +#ifdef TCP_QUICKACK /* Avoid pure-ACKs while in tls protocol pingpong phase */ + /* Unfortunately we cannot be certain to do this before a TLS-on-connect + Client Hello arrives and is acked. We do it as early as possible. */ + (void) setsockopt(accept_socket, IPPROTO_TCP, TCP_QUICKACK, US &off, sizeof(off)); +#endif if (inetd_wait_timeout) last_connection_time = time(NULL); - handle_smtp_call(listen_sockets, listen_socket_count, accept_socket, + handle_smtp_call(fd_polls, listen_socket_count, accept_socket, (struct sockaddr *)&accepted); } } @@ -2452,10 +2642,8 @@ for (;;) else { - struct timeval tv; - tv.tv_sec = queue_interval; - tv.tv_usec = 0; - select(0, NULL, NULL, NULL, &tv); + struct pollfd p; + poll(&p, 0, queue_interval * 1000); handle_ending_processes(); } @@ -2480,8 +2668,7 @@ for (;;) { log_write(0, LOG_MAIN, "pid %d: SIGHUP received: re-exec daemon", getpid()); - close_daemon_sockets(daemon_notifier_fd, - listen_sockets, listen_socket_count); + close_daemon_sockets(daemon_notifier_fd, fd_polls, listen_socket_count); ALARM_CLR(0); signal(SIGHUP, SIG_IGN); sighup_argv[0] = exim_path;