Path: blob/master/src/java.base/aix/native/libnet/aix_close.c
41120 views
/*1* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.2* Copyright (c) 2016, 2019 SAP SE and/or its affiliates. All rights reserved.3* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.4*5* This code is free software; you can redistribute it and/or modify it6* under the terms of the GNU General Public License version 2 only, as7* published by the Free Software Foundation. Oracle designates this8* particular file as subject to the "Classpath" exception as provided9* by Oracle in the LICENSE file that accompanied this code.10*11* This code is distributed in the hope that it will be useful, but WITHOUT12* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or13* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License14* version 2 for more details (a copy is included in the LICENSE file that15* accompanied this code).16*17* You should have received a copy of the GNU General Public License version18* 2 along with this work; if not, write to the Free Software Foundation,19* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.20*21* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA22* or visit www.oracle.com if you need additional information or have any23* questions.24*/2526/*27* This file contains implementations of NET_... functions. The NET_.. functions are28* wrappers for common file- and socket functions plus provisions for non-blocking IO.29*30* (basically, the layers remember all file descriptors waiting for a particular fd;31* all threads waiting on a certain fd can be woken up by sending them a signal; this32* is done e.g. when the fd is closed.)33*34* This was originally copied from the linux_close.c implementation.35*36* Side Note: This coding needs initialization. Under Linux this is done37* automatically via __attribute((constructor)), on AIX this is done manually38* (see aix_close_init).39*40*/4142/*43AIX needs a workaround for I/O cancellation, see:44http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/close.htm45...46The close subroutine is blocked until all subroutines which use the file47descriptor return to usr space. For example, when a thread is calling close48and another thread is calling select with the same file descriptor, the49close subroutine does not return until the select call returns.50...51*/5253#include <assert.h>54#include <limits.h>55#include <stdio.h>56#include <stdlib.h>57#include <signal.h>58#include <pthread.h>59#include <sys/types.h>60#include <sys/socket.h>61#include <sys/time.h>62#include <sys/resource.h>63#include <sys/uio.h>64#include <unistd.h>65#include <errno.h>66#include <poll.h>67#include "jvm.h"68#include "net_util.h"6970/*71* Stack allocated by thread when doing blocking operation72*/73typedef struct threadEntry {74pthread_t thr; /* this thread */75struct threadEntry *next; /* next thread */76int intr; /* interrupted */77} threadEntry_t;7879/*80* Heap allocated during initialized - one entry per fd81*/82typedef struct {83pthread_mutex_t lock; /* fd lock */84threadEntry_t *threads; /* threads blocked on fd */85} fdEntry_t;8687/*88* Signal to unblock thread89*/90static int sigWakeup = (SIGRTMAX - 1);9192/*93* fdTable holds one entry per file descriptor, up to a certain94* maximum.95* Theoretically, the number of possible file descriptors can get96* large, though usually it does not. Entries for small value file97* descriptors are kept in a simple table, which covers most scenarios.98* Entries for large value file descriptors are kept in an overflow99* table, which is organized as a sparse two dimensional array whose100* slabs are allocated on demand. This covers all corner cases while101* keeping memory consumption reasonable.102*/103104/* Base table for low value file descriptors */105static fdEntry_t* fdTable = NULL;106/* Maximum size of base table (in number of entries). */107static const int fdTableMaxSize = 0x1000; /* 4K */108/* Actual size of base table (in number of entries) */109static int fdTableLen = 0;110/* Max. theoretical number of file descriptors on system. */111static int fdLimit = 0;112113/* Overflow table, should base table not be large enough. Organized as114* an array of n slabs, each holding 64k entries.115*/116static fdEntry_t** fdOverflowTable = NULL;117/* Number of slabs in the overflow table */118static int fdOverflowTableLen = 0;119/* Number of entries in one slab */120static const int fdOverflowTableSlabSize = 0x10000; /* 64k */121pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;122123/*124* Null signal handler125*/126static void sig_wakeup(int sig) {127}128129/*130* Initialization routine (executed when library is loaded)131* Allocate fd tables and sets up signal handler.132*133* On AIX we don't have __attribute((constructor)) so we need to initialize134* manually (from JNI_OnLoad() in 'src/share/native/java/net/net_util.c')135*/136void aix_close_init() {137struct rlimit nbr_files;138sigset_t sigset;139struct sigaction sa;140int i = 0;141142/* Determine the maximum number of possible file descriptors. */143if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {144fprintf(stderr, "library initialization failed - "145"unable to get max # of allocated fds\n");146abort();147}148if (nbr_files.rlim_max != RLIM_INFINITY) {149fdLimit = nbr_files.rlim_max;150} else {151/* We just do not know. */152fdLimit = INT_MAX;153}154155/* Allocate table for low value file descriptors. */156fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;157fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));158if (fdTable == NULL) {159fprintf(stderr, "library initialization failed - "160"unable to allocate file descriptor table - out of memory");161abort();162} else {163for (i = 0; i < fdTableLen; i ++) {164pthread_mutex_init(&fdTable[i].lock, NULL);165}166}167168/* Allocate overflow table, if needed */169if (fdLimit > fdTableMaxSize) {170fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;171fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));172if (fdOverflowTable == NULL) {173fprintf(stderr, "library initialization failed - "174"unable to allocate file descriptor overflow table - out of memory");175abort();176}177}178179/*180* Setup the signal handler181*/182sa.sa_handler = sig_wakeup;183sa.sa_flags = 0;184sigemptyset(&sa.sa_mask);185sigaction(sigWakeup, &sa, NULL);186187sigemptyset(&sigset);188sigaddset(&sigset, sigWakeup);189sigprocmask(SIG_UNBLOCK, &sigset, NULL);190}191192/*193* Return the fd table for this fd.194*/195static inline fdEntry_t *getFdEntry(int fd)196{197fdEntry_t* result = NULL;198199if (fd < 0) {200return NULL;201}202203/* This should not happen. If it does, our assumption about204* max. fd value was wrong. */205assert(fd < fdLimit);206207if (fd < fdTableMaxSize) {208/* fd is in base table. */209assert(fd < fdTableLen);210result = &fdTable[fd];211} else {212/* fd is in overflow table. */213const int indexInOverflowTable = fd - fdTableMaxSize;214const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;215const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;216fdEntry_t* slab = NULL;217assert(rootindex < fdOverflowTableLen);218assert(slabindex < fdOverflowTableSlabSize);219pthread_mutex_lock(&fdOverflowTableLock);220/* Allocate new slab in overflow table if needed */221if (fdOverflowTable[rootindex] == NULL) {222fdEntry_t* const newSlab =223(fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));224if (newSlab == NULL) {225fprintf(stderr, "Unable to allocate file descriptor overflow"226" table slab - out of memory");227pthread_mutex_unlock(&fdOverflowTableLock);228abort();229} else {230int i;231for (i = 0; i < fdOverflowTableSlabSize; i ++) {232pthread_mutex_init(&newSlab[i].lock, NULL);233}234fdOverflowTable[rootindex] = newSlab;235}236}237pthread_mutex_unlock(&fdOverflowTableLock);238slab = fdOverflowTable[rootindex];239result = &slab[slabindex];240}241242return result;243244}245246247/*248* Start a blocking operation :-249* Insert thread onto thread list for the fd.250*/251static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)252{253self->thr = pthread_self();254self->intr = 0;255256pthread_mutex_lock(&(fdEntry->lock));257{258self->next = fdEntry->threads;259fdEntry->threads = self;260}261pthread_mutex_unlock(&(fdEntry->lock));262}263264/*265* End a blocking operation :-266* Remove thread from thread list for the fd267* If fd has been interrupted then set errno to EBADF268*/269static inline void endOp270(fdEntry_t *fdEntry, threadEntry_t *self)271{272int orig_errno = errno;273pthread_mutex_lock(&(fdEntry->lock));274{275threadEntry_t *curr, *prev=NULL;276curr = fdEntry->threads;277while (curr != NULL) {278if (curr == self) {279if (curr->intr) {280orig_errno = EBADF;281}282if (prev == NULL) {283fdEntry->threads = curr->next;284} else {285prev->next = curr->next;286}287break;288}289prev = curr;290curr = curr->next;291}292}293pthread_mutex_unlock(&(fdEntry->lock));294errno = orig_errno;295}296297/*298* Close or dup2 a file descriptor ensuring that all threads blocked on299* the file descriptor are notified via a wakeup signal.300*301* fd1 < 0 => close(fd2)302* fd1 >= 0 => dup2(fd1, fd2)303*304* Returns -1 with errno set if operation fails.305*/306static int closefd(int fd1, int fd2) {307int rv, orig_errno;308fdEntry_t *fdEntry = getFdEntry(fd2);309if (fdEntry == NULL) {310errno = EBADF;311return -1;312}313314/*315* Lock the fd to hold-off additional I/O on this fd.316*/317pthread_mutex_lock(&(fdEntry->lock));318319{320/* On fast machines we see that we enter dup2 before the321* accepting thread had a chance to get and process the signal.322* So in case we woke a thread up, give it some time to cope.323* Also see https://bugs.openjdk.java.net/browse/JDK-8006395 */324int num_woken = 0;325326/*327* Send a wakeup signal to all threads blocked on this328* file descriptor.329*/330threadEntry_t *curr = fdEntry->threads;331while (curr != NULL) {332curr->intr = 1;333pthread_kill( curr->thr, sigWakeup );334num_woken ++;335curr = curr->next;336}337338if (num_woken > 0) {339usleep(num_woken * 50);340}341342/*343* And close/dup the file descriptor344* (restart if interrupted by signal)345*/346do {347if (fd1 < 0) {348rv = close(fd2);349} else {350rv = dup2(fd1, fd2);351}352} while (rv == -1 && errno == EINTR);353}354355/*356* Unlock without destroying errno357*/358orig_errno = errno;359pthread_mutex_unlock(&(fdEntry->lock));360errno = orig_errno;361362return rv;363}364365/*366* Wrapper for dup2 - same semantics as dup2 system call except367* that any threads blocked in an I/O system call on fd2 will be368* preempted and return -1/EBADF;369*/370int NET_Dup2(int fd, int fd2) {371if (fd < 0) {372errno = EBADF;373return -1;374}375return closefd(fd, fd2);376}377378/*379* Wrapper for close - same semantics as close system call380* except that any threads blocked in an I/O on fd will be381* preempted and the I/O system call will return -1/EBADF.382*/383int NET_SocketClose(int fd) {384return closefd(-1, fd);385}386387/************** Basic I/O operations here ***************/388389/*390* Macro to perform a blocking IO operation. Restarts391* automatically if interrupted by signal (other than392* our wakeup signal)393*/394#define BLOCKING_IO_RETURN_INT(FD, FUNC) { \395int ret; \396threadEntry_t self; \397fdEntry_t *fdEntry = getFdEntry(FD); \398if (fdEntry == NULL) { \399errno = EBADF; \400return -1; \401} \402do { \403startOp(fdEntry, &self); \404ret = FUNC; \405endOp(fdEntry, &self); \406} while (ret == -1 && errno == EINTR); \407return ret; \408}409410int NET_Read(int s, void* buf, size_t len) {411BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );412}413414int NET_NonBlockingRead(int s, void* buf, size_t len) {415BLOCKING_IO_RETURN_INT(s, recv(s, buf, len, MSG_NONBLOCK));416}417418int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,419struct sockaddr *from, socklen_t *fromlen) {420BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );421}422423int NET_Send(int s, void *msg, int len, unsigned int flags) {424BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );425}426427int NET_SendTo(int s, const void *msg, int len, unsigned int428flags, const struct sockaddr *to, int tolen) {429BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );430}431432int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {433BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );434}435436int NET_Connect(int s, struct sockaddr *addr, int addrlen) {437int crc = -1, prc = -1;438threadEntry_t self;439fdEntry_t* fdEntry = getFdEntry(s);440441if (fdEntry == NULL) {442errno = EBADF;443return -1;444}445446/* On AIX, when the system call connect() is interrupted, the connection447* is not aborted and it will be established asynchronously by the kernel.448* Hence, no need to restart connect() when EINTR is received449*/450startOp(fdEntry, &self);451crc = connect(s, addr, addrlen);452endOp(fdEntry, &self);453454if (crc == -1 && errno == EINTR) {455struct pollfd s_pollfd;456int sockopt_arg = 0;457socklen_t len;458459s_pollfd.fd = s;460s_pollfd.events = POLLOUT | POLLERR;461462/* poll the file descriptor */463do {464startOp(fdEntry, &self);465prc = poll(&s_pollfd, 1, -1);466endOp(fdEntry, &self);467} while (prc == -1 && errno == EINTR);468469if (prc < 0)470return prc;471472len = sizeof(sockopt_arg);473474/* Check whether the connection has been established */475if (getsockopt(s, SOL_SOCKET, SO_ERROR, &sockopt_arg, &len) == -1)476return -1;477478if (sockopt_arg != 0 ) {479errno = sockopt_arg;480return -1;481}482} else {483return crc;484}485486/* At this point, fd is connected. Set successful return code */487return 0;488}489490int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {491BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );492}493494/*495* Wrapper for poll(s, timeout).496* Auto restarts with adjusted timeout if interrupted by497* signal other than our wakeup signal.498*/499int NET_Timeout(JNIEnv *env, int s, long timeout, jlong nanoTimeStamp) {500jlong prevNanoTime = nanoTimeStamp;501jlong nanoTimeout = (jlong) timeout * NET_NSEC_PER_MSEC;502fdEntry_t *fdEntry = getFdEntry(s);503504/*505* Check that fd hasn't been closed.506*/507if (fdEntry == NULL) {508errno = EBADF;509return -1;510}511512for(;;) {513struct pollfd pfd;514int rv;515threadEntry_t self;516517/*518* Poll the fd. If interrupted by our wakeup signal519* errno will be set to EBADF.520*/521pfd.fd = s;522pfd.events = POLLIN | POLLERR;523524startOp(fdEntry, &self);525rv = poll(&pfd, 1, nanoTimeout / NET_NSEC_PER_MSEC);526endOp(fdEntry, &self);527528/*529* If interrupted then adjust timeout. If timeout530* has expired return 0 (indicating timeout expired).531*/532if (rv < 0 && errno == EINTR) {533if (timeout > 0) {534jlong newNanoTime = JVM_NanoTime(env, 0);535nanoTimeout -= newNanoTime - prevNanoTime;536if (nanoTimeout < NET_NSEC_PER_MSEC) {537return 0;538}539prevNanoTime = newNanoTime;540} else {541continue; // timeout is -1, so loop again.542}543} else {544return rv;545}546}547}548549550