/* Copyright (c) 2007-2016 Contributors as noted in the AUTHORS file This file is part of libzmq, the ZeroMQ core engine in C++. libzmq is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License (LGPL) as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. As a special exception, the Contributors give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify this library, you must extend this exception to your version of the library. libzmq is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include "precompiled.hpp" #include "select.hpp" #if defined ZMQ_IOTHREAD_POLLER_USE_SELECT #if defined ZMQ_HAVE_WINDOWS #elif defined ZMQ_HAVE_HPUX #include <sys/param.h> #include <sys/types.h> #include <sys/time.h> #elif defined ZMQ_HAVE_OPENVMS #include <sys/types.h> #include <sys/time.h> #elif defined ZMQ_HAVE_VXWORKS #include <sys/types.h> #include <sys/time.h> #include <strings.h> #else #include <sys/select.h> #endif #include "err.hpp" #include "config.hpp" #include "i_poll_events.hpp" #include <algorithm> #include <limits> #include <climits> zmq::select_t::select_t (const zmq::thread_ctx_t &ctx_) : worker_poller_base_t (ctx_), #if defined ZMQ_HAVE_WINDOWS // Fine as long as map is not cleared. _current_family_entry_it (_family_entries.end ()) #else _max_fd (retired_fd) #endif { #if defined ZMQ_HAVE_WINDOWS for (size_t i = 0; i < fd_family_cache_size; ++i) _fd_family_cache[i] = std::make_pair (retired_fd, 0); #endif } zmq::select_t::~select_t () { stop_worker (); } zmq::select_t::handle_t zmq::select_t::add_fd (fd_t fd_, i_poll_events *events_) { check_thread (); zmq_assert (fd_ != retired_fd); fd_entry_t fd_entry; fd_entry.fd = fd_; fd_entry.events = events_; #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (fd_); wsa_assert (family != AF_UNSPEC); family_entry_t &family_entry = _family_entries[family]; #else family_entry_t &family_entry = _family_entry; #endif family_entry.fd_entries.push_back (fd_entry); FD_SET (fd_, &family_entry.fds_set.error); #if !defined ZMQ_HAVE_WINDOWS if (fd_ > _max_fd) _max_fd = fd_; #endif adjust_load (1); return fd_; } zmq::select_t::fd_entries_t::iterator zmq::select_t::find_fd_entry_by_handle (fd_entries_t &fd_entries_, handle_t handle_) { fd_entries_t::iterator fd_entry_it; for (fd_entry_it = fd_entries_.begin (); fd_entry_it != fd_entries_.end (); ++fd_entry_it) if (fd_entry_it->fd == handle_) break; return fd_entry_it; } void zmq::select_t::trigger_events (const fd_entries_t &fd_entries_, const fds_set_t &local_fds_set_, int event_count_) { // Size is cached to avoid iteration through recently added descriptors. for (fd_entries_t::size_type i = 0, size = fd_entries_.size (); i < size && event_count_ > 0; ++i) { // fd_entries_[i] may not be stored, since calls to // in_event/out_event may reallocate the vector if (is_retired_fd (fd_entries_[i])) continue; if (FD_ISSET (fd_entries_[i].fd, &local_fds_set_.read)) { fd_entries_[i].events->in_event (); --event_count_; } // TODO: can the is_retired_fd be true at this point? if it // was retired before, we would already have continued, and I // don't see where it might have been modified // And if rc == 0, we can break instead of continuing if (is_retired_fd (fd_entries_[i]) || event_count_ == 0) continue; if (FD_ISSET (fd_entries_[i].fd, &local_fds_set_.write)) { fd_entries_[i].events->out_event (); --event_count_; } // TODO: same as above if (is_retired_fd (fd_entries_[i]) || event_count_ == 0) continue; if (FD_ISSET (fd_entries_[i].fd, &local_fds_set_.error)) { fd_entries_[i].events->in_event (); --event_count_; } } } #if defined ZMQ_HAVE_WINDOWS int zmq::select_t::try_retire_fd_entry ( family_entries_t::iterator family_entry_it_, zmq::fd_t &handle_) { family_entry_t &family_entry = family_entry_it_->second; fd_entries_t::iterator fd_entry_it = find_fd_entry_by_handle (family_entry.fd_entries, handle_); if (fd_entry_it == family_entry.fd_entries.end ()) return 0; fd_entry_t &fd_entry = *fd_entry_it; zmq_assert (fd_entry.fd != retired_fd); if (family_entry_it_ != _current_family_entry_it) { // Family is not currently being iterated and can be safely // modified in-place. So later it can be skipped without // re-verifying its content. family_entry.fd_entries.erase (fd_entry_it); } else { // Otherwise mark removed entries as retired. It will be cleaned up // at the end of the iteration. See zmq::select_t::loop fd_entry.fd = retired_fd; family_entry.has_retired = true; } family_entry.fds_set.remove_fd (handle_); return 1; } #endif void zmq::select_t::rm_fd (handle_t handle_) { check_thread (); int retired = 0; #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (handle_); if (family != AF_UNSPEC) { family_entries_t::iterator family_entry_it = _family_entries.find (family); retired += try_retire_fd_entry (family_entry_it, handle_); } else { // get_fd_family may fail and return AF_UNSPEC if the socket was not // successfully connected. In that case, we need to look for the // socket in all family_entries. family_entries_t::iterator end = _family_entries.end (); for (family_entries_t::iterator family_entry_it = _family_entries.begin (); family_entry_it != end; ++family_entry_it) { if (retired += try_retire_fd_entry (family_entry_it, handle_)) { break; } } } #else fd_entries_t::iterator fd_entry_it = find_fd_entry_by_handle (_family_entry.fd_entries, handle_); assert (fd_entry_it != _family_entry.fd_entries.end ()); zmq_assert (fd_entry_it->fd != retired_fd); fd_entry_it->fd = retired_fd; _family_entry.fds_set.remove_fd (handle_); ++retired; if (handle_ == _max_fd) { _max_fd = retired_fd; for (fd_entry_it = _family_entry.fd_entries.begin (); fd_entry_it != _family_entry.fd_entries.end (); ++fd_entry_it) if (fd_entry_it->fd > _max_fd) _max_fd = fd_entry_it->fd; } _family_entry.has_retired = true; #endif zmq_assert (retired == 1); adjust_load (-1); } void zmq::select_t::set_pollin (handle_t handle_) { check_thread (); #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (handle_); wsa_assert (family != AF_UNSPEC); family_entry_t &family_entry = _family_entries[family]; #else family_entry_t &family_entry = _family_entry; #endif FD_SET (handle_, &family_entry.fds_set.read); } void zmq::select_t::reset_pollin (handle_t handle_) { check_thread (); #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (handle_); wsa_assert (family != AF_UNSPEC); family_entry_t &family_entry = _family_entries[family]; #else family_entry_t &family_entry = _family_entry; #endif FD_CLR (handle_, &family_entry.fds_set.read); } void zmq::select_t::set_pollout (handle_t handle_) { check_thread (); #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (handle_); wsa_assert (family != AF_UNSPEC); family_entry_t &family_entry = _family_entries[family]; #else family_entry_t &family_entry = _family_entry; #endif FD_SET (handle_, &family_entry.fds_set.write); } void zmq::select_t::reset_pollout (handle_t handle_) { check_thread (); #if defined ZMQ_HAVE_WINDOWS u_short family = get_fd_family (handle_); wsa_assert (family != AF_UNSPEC); family_entry_t &family_entry = _family_entries[family]; #else family_entry_t &family_entry = _family_entry; #endif FD_CLR (handle_, &family_entry.fds_set.write); } void zmq::select_t::stop () { check_thread (); // no-op... thread is stopped when no more fds or timers are registered } int zmq::select_t::max_fds () { return FD_SETSIZE; } void zmq::select_t::loop () { while (true) { // Execute any due timers. int timeout = static_cast<int> (execute_timers ()); cleanup_retired (); #ifdef _WIN32 if (_family_entries.empty ()) { #else if (_family_entry.fd_entries.empty ()) { #endif zmq_assert (get_load () == 0); if (timeout == 0) break; // TODO sleep for timeout continue; } #if defined ZMQ_HAVE_OSX struct timeval tv = {(long) (timeout / 1000), timeout % 1000 * 1000}; #else struct timeval tv = {static_cast<long> (timeout / 1000), static_cast<long> (timeout % 1000 * 1000)}; #endif #if defined ZMQ_HAVE_WINDOWS /* On Windows select does not allow to mix descriptors from different service providers. It seems to work for AF_INET and AF_INET6, but fails for AF_INET and VMCI. The workaround is to use WSAEventSelect and WSAWaitForMultipleEvents to wait, then use select to find out what actually changed. WSAWaitForMultipleEvents cannot be used alone, because it does not support more than 64 events which is not enough. To reduce unnecessary overhead, WSA is only used when there are more than one family. Moreover, AF_INET and AF_INET6 are considered the same family because Windows seems to handle them properly. See get_fd_family for details. */ // If there is just one family, there is no reason to use WSA events. int rc = 0; const bool use_wsa_events = _family_entries.size () > 1; if (use_wsa_events) { // TODO: I don't really understand why we are doing this. If any of // the events was signaled, we will call select for each fd_family // afterwards. The only benefit is if none of the events was // signaled, then we continue early. // IMHO, either WSAEventSelect/WSAWaitForMultipleEvents or select // should be used, but not both wsa_events_t wsa_events; for (family_entries_t::iterator family_entry_it = _family_entries.begin (); family_entry_it != _family_entries.end (); ++family_entry_it) { family_entry_t &family_entry = family_entry_it->second; for (fd_entries_t::iterator fd_entry_it = family_entry.fd_entries.begin (); fd_entry_it != family_entry.fd_entries.end (); ++fd_entry_it) { fd_t fd = fd_entry_it->fd; // http://stackoverflow.com/q/35043420/188530 if (FD_ISSET (fd, &family_entry.fds_set.read) && FD_ISSET (fd, &family_entry.fds_set.write)) rc = WSAEventSelect (fd, wsa_events.events[3], FD_READ | FD_ACCEPT | FD_CLOSE | FD_WRITE | FD_CONNECT | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.read)) rc = WSAEventSelect (fd, wsa_events.events[0], FD_READ | FD_ACCEPT | FD_CLOSE | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.write)) rc = WSAEventSelect (fd, wsa_events.events[1], FD_WRITE | FD_CONNECT | FD_OOB); else if (FD_ISSET (fd, &family_entry.fds_set.error)) rc = WSAEventSelect (fd, wsa_events.events[2], FD_OOB); else rc = 0; wsa_assert (rc != SOCKET_ERROR); } } rc = WSAWaitForMultipleEvents (4, wsa_events.events, FALSE, timeout ? timeout : INFINITE, FALSE); wsa_assert (rc != (int) WSA_WAIT_FAILED); zmq_assert (rc != WSA_WAIT_IO_COMPLETION); if (rc == WSA_WAIT_TIMEOUT) continue; } for (_current_family_entry_it = _family_entries.begin (); _current_family_entry_it != _family_entries.end (); ++_current_family_entry_it) { family_entry_t &family_entry = _current_family_entry_it->second; if (use_wsa_events) { // There is no reason to wait again after WSAWaitForMultipleEvents. // Simply collect what is ready. struct timeval tv_nodelay = {0, 0}; select_family_entry (family_entry, 0, true, tv_nodelay); } else { select_family_entry (family_entry, 0, timeout > 0, tv); } } #else select_family_entry (_family_entry, _max_fd + 1, timeout > 0, tv); #endif } } void zmq::select_t::select_family_entry (family_entry_t &family_entry_, const int max_fd_, const bool use_timeout_, struct timeval &tv_) { // select will fail when run with empty sets. fd_entries_t &fd_entries = family_entry_.fd_entries; if (fd_entries.empty ()) return; fds_set_t local_fds_set = family_entry_.fds_set; int rc = select (max_fd_, &local_fds_set.read, &local_fds_set.write, &local_fds_set.error, use_timeout_ ? &tv_ : NULL); #if defined ZMQ_HAVE_WINDOWS wsa_assert (rc != SOCKET_ERROR); #else if (rc == -1) { errno_assert (errno == EINTR); return; } #endif trigger_events (fd_entries, local_fds_set, rc); cleanup_retired (family_entry_); } zmq::select_t::fds_set_t::fds_set_t () { FD_ZERO (&read); FD_ZERO (&write); FD_ZERO (&error); } zmq::select_t::fds_set_t::fds_set_t (const fds_set_t &other_) { #if defined ZMQ_HAVE_WINDOWS // On Windows we don't need to copy the whole fd_set. // SOCKETS are continuous from the beginning of fd_array in fd_set. // We just need to copy fd_count elements of fd_array. // We gain huge memcpy() improvement if number of used SOCKETs is much lower than FD_SETSIZE. memcpy (&read, &other_.read, (char *) (other_.read.fd_array + other_.read.fd_count) - (char *) &other_.read); memcpy (&write, &other_.write, (char *) (other_.write.fd_array + other_.write.fd_count) - (char *) &other_.write); memcpy (&error, &other_.error, (char *) (other_.error.fd_array + other_.error.fd_count) - (char *) &other_.error); #else memcpy (&read, &other_.read, sizeof other_.read); memcpy (&write, &other_.write, sizeof other_.write); memcpy (&error, &other_.error, sizeof other_.error); #endif } zmq::select_t::fds_set_t &zmq::select_t::fds_set_t:: operator= (const fds_set_t &other_) { #if defined ZMQ_HAVE_WINDOWS // On Windows we don't need to copy the whole fd_set. // SOCKETS are continuous from the beginning of fd_array in fd_set. // We just need to copy fd_count elements of fd_array. // We gain huge memcpy() improvement if number of used SOCKETs is much lower than FD_SETSIZE. memcpy (&read, &other_.read, (char *) (other_.read.fd_array + other_.read.fd_count) - (char *) &other_.read); memcpy (&write, &other_.write, (char *) (other_.write.fd_array + other_.write.fd_count) - (char *) &other_.write); memcpy (&error, &other_.error, (char *) (other_.error.fd_array + other_.error.fd_count) - (char *) &other_.error); #else memcpy (&read, &other_.read, sizeof other_.read); memcpy (&write, &other_.write, sizeof other_.write); memcpy (&error, &other_.error, sizeof other_.error); #endif return *this; } void zmq::select_t::fds_set_t::remove_fd (const fd_t &fd_) { FD_CLR (fd_, &read); FD_CLR (fd_, &write); FD_CLR (fd_, &error); } bool zmq::select_t::cleanup_retired (family_entry_t &family_entry_) { if (family_entry_.has_retired) { family_entry_.has_retired = false; family_entry_.fd_entries.erase ( std::remove_if (family_entry_.fd_entries.begin (), family_entry_.fd_entries.end (), is_retired_fd), family_entry_.fd_entries.end ()); } return family_entry_.fd_entries.empty (); } void zmq::select_t::cleanup_retired () { #ifdef _WIN32 for (family_entries_t::iterator it = _family_entries.begin (); it != _family_entries.end ();) { if (cleanup_retired (it->second)) it = _family_entries.erase (it); else ++it; } #else cleanup_retired (_family_entry); #endif } bool zmq::select_t::is_retired_fd (const fd_entry_t &entry_) { return entry_.fd == retired_fd; } zmq::select_t::family_entry_t::family_entry_t () : has_retired (false) { } #if defined ZMQ_HAVE_WINDOWS u_short zmq::select_t::get_fd_family (fd_t fd_) { // cache the results of determine_fd_family, as this is frequently called // for the same sockets, and determine_fd_family is expensive size_t i; for (i = 0; i < fd_family_cache_size; ++i) { const std::pair<fd_t, u_short> &entry = _fd_family_cache[i]; if (entry.first == fd_) { return entry.second; } if (entry.first == retired_fd) break; } std::pair<fd_t, u_short> res = std::make_pair (fd_, determine_fd_family (fd_)); if (i < fd_family_cache_size) { _fd_family_cache[i] = res; } else { // just overwrite a random entry // could be optimized by some LRU strategy _fd_family_cache[rand () % fd_family_cache_size] = res; } return res.second; } u_short zmq::select_t::determine_fd_family (fd_t fd_) { // Use sockaddr_storage instead of sockaddr to accommodate different structure sizes sockaddr_storage addr = {0}; int addr_size = sizeof addr; int type; int type_length = sizeof (int); int rc = getsockopt (fd_, SOL_SOCKET, SO_TYPE, reinterpret_cast<char *> (&type), &type_length); if (rc == 0) { if (type == SOCK_DGRAM) return AF_INET; rc = getsockname (fd_, reinterpret_cast<sockaddr *> (&addr), &addr_size); // AF_INET and AF_INET6 can be mixed in select // TODO: If proven otherwise, should simply return addr.sa_family if (rc != SOCKET_ERROR) return addr.ss_family == AF_INET6 ? AF_INET : addr.ss_family; } return AF_UNSPEC; } zmq::select_t::wsa_events_t::wsa_events_t () { events[0] = WSACreateEvent (); wsa_assert (events[0] != WSA_INVALID_EVENT); events[1] = WSACreateEvent (); wsa_assert (events[1] != WSA_INVALID_EVENT); events[2] = WSACreateEvent (); wsa_assert (events[2] != WSA_INVALID_EVENT); events[3] = WSACreateEvent (); wsa_assert (events[3] != WSA_INVALID_EVENT); } zmq::select_t::wsa_events_t::~wsa_events_t () { wsa_assert (WSACloseEvent (events[0])); wsa_assert (WSACloseEvent (events[1])); wsa_assert (WSACloseEvent (events[2])); wsa_assert (WSACloseEvent (events[3])); } #endif #endif