Path: blob/master/src/java.base/linux/native/libnet/linux_close.c
41119 views
/*1* Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/2425#include <assert.h>26#include <limits.h>27#include <stdio.h>28#include <stdlib.h>29#include <signal.h>30#include <pthread.h>31#include <sys/types.h>32#include <sys/socket.h>33#include <sys/time.h>34#include <sys/resource.h>35#include <sys/uio.h>36#include <unistd.h>37#include <errno.h>38#include <poll.h>39#include "jvm.h"40#include "net_util.h"4142/*43* Stack allocated by thread when doing blocking operation44*/45typedef struct threadEntry {46pthread_t thr; /* this thread */47struct threadEntry *next; /* next thread */48int intr; /* interrupted */49} threadEntry_t;5051/*52* Heap allocated during initialized - one entry per fd53*/54typedef struct {55pthread_mutex_t lock; /* fd lock */56threadEntry_t *threads; /* threads blocked on fd */57} fdEntry_t;5859/*60* Signal to unblock thread61*/62#define WAKEUP_SIGNAL (SIGRTMAX - 2)6364/*65* fdTable holds one entry per file descriptor, up to a certain66* maximum.67* Theoretically, the number of possible file descriptors can get68* large, though usually it does not. Entries for small value file69* descriptors are kept in a simple table, which covers most scenarios.70* Entries for large value file descriptors are kept in an overflow71* table, which is organized as a sparse two dimensional array whose72* slabs are allocated on demand. This covers all corner cases while73* keeping memory consumption reasonable.74*/7576/* Base table for low value file descriptors */77static fdEntry_t* fdTable = NULL;78/* Maximum size of base table (in number of entries). */79static const int fdTableMaxSize = 0x1000; /* 4K */80/* Actual size of base table (in number of entries) */81static int fdTableLen = 0;82/* Max. theoretical number of file descriptors on system. */83static int fdLimit = 0;8485/* Overflow table, should base table not be large enough. Organized as86* an array of n slabs, each holding 64k entries.87*/88static fdEntry_t** fdOverflowTable = NULL;89/* Number of slabs in the overflow table */90static int fdOverflowTableLen = 0;91/* Number of entries in one slab */92static const int fdOverflowTableSlabSize = 0x10000; /* 64k */93pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;9495/*96* Null signal handler97*/98static void sig_wakeup(int sig) {99}100101/*102* Initialization routine (executed when library is loaded)103* Allocate fd tables and sets up signal handler.104*/105static void __attribute((constructor)) init() {106struct rlimit nbr_files;107sigset_t sigset;108struct sigaction sa;109int i = 0;110111/* Determine the maximum number of possible file descriptors. */112if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {113fprintf(stderr, "library initialization failed - "114"unable to get max # of allocated fds\n");115abort();116}117if (nbr_files.rlim_max != RLIM_INFINITY) {118fdLimit = nbr_files.rlim_max;119} else {120/* We just do not know. */121fdLimit = INT_MAX;122}123124/* Allocate table for low value file descriptors. */125fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;126fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));127if (fdTable == NULL) {128fprintf(stderr, "library initialization failed - "129"unable to allocate file descriptor table - out of memory");130abort();131} else {132for (i = 0; i < fdTableLen; i ++) {133pthread_mutex_init(&fdTable[i].lock, NULL);134}135}136137/* Allocate overflow table, if needed */138if (fdLimit > fdTableMaxSize) {139fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;140fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));141if (fdOverflowTable == NULL) {142fprintf(stderr, "library initialization failed - "143"unable to allocate file descriptor overflow table - out of memory");144abort();145}146}147148/*149* Setup the signal handler150*/151sa.sa_handler = sig_wakeup;152sa.sa_flags = 0;153sigemptyset(&sa.sa_mask);154sigaction(WAKEUP_SIGNAL, &sa, NULL);155156sigemptyset(&sigset);157sigaddset(&sigset, WAKEUP_SIGNAL);158sigprocmask(SIG_UNBLOCK, &sigset, NULL);159}160161/*162* Return the fd table for this fd.163*/164static inline fdEntry_t *getFdEntry(int fd)165{166fdEntry_t* result = NULL;167168if (fd < 0) {169return NULL;170}171172/* This should not happen. If it does, our assumption about173* max. fd value was wrong. */174assert(fd < fdLimit);175176if (fd < fdTableMaxSize) {177/* fd is in base table. */178assert(fd < fdTableLen);179result = &fdTable[fd];180} else {181/* fd is in overflow table. */182const int indexInOverflowTable = fd - fdTableMaxSize;183const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;184const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;185fdEntry_t* slab = NULL;186assert(rootindex < fdOverflowTableLen);187assert(slabindex < fdOverflowTableSlabSize);188pthread_mutex_lock(&fdOverflowTableLock);189/* Allocate new slab in overflow table if needed */190if (fdOverflowTable[rootindex] == NULL) {191fdEntry_t* const newSlab =192(fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));193if (newSlab == NULL) {194fprintf(stderr, "Unable to allocate file descriptor overflow"195" table slab - out of memory");196pthread_mutex_unlock(&fdOverflowTableLock);197abort();198} else {199int i;200for (i = 0; i < fdOverflowTableSlabSize; i ++) {201pthread_mutex_init(&newSlab[i].lock, NULL);202}203fdOverflowTable[rootindex] = newSlab;204}205}206pthread_mutex_unlock(&fdOverflowTableLock);207slab = fdOverflowTable[rootindex];208result = &slab[slabindex];209}210211return result;212213}214215/*216* Start a blocking operation :-217* Insert thread onto thread list for the fd.218*/219static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)220{221self->thr = pthread_self();222self->intr = 0;223224pthread_mutex_lock(&(fdEntry->lock));225{226self->next = fdEntry->threads;227fdEntry->threads = self;228}229pthread_mutex_unlock(&(fdEntry->lock));230}231232/*233* End a blocking operation :-234* Remove thread from thread list for the fd235* If fd has been interrupted then set errno to EBADF236*/237static inline void endOp238(fdEntry_t *fdEntry, threadEntry_t *self)239{240int orig_errno = errno;241pthread_mutex_lock(&(fdEntry->lock));242{243threadEntry_t *curr, *prev=NULL;244curr = fdEntry->threads;245while (curr != NULL) {246if (curr == self) {247if (curr->intr) {248orig_errno = EBADF;249}250if (prev == NULL) {251fdEntry->threads = curr->next;252} else {253prev->next = curr->next;254}255break;256}257prev = curr;258curr = curr->next;259}260}261pthread_mutex_unlock(&(fdEntry->lock));262errno = orig_errno;263}264265/*266* Close or dup2 a file descriptor ensuring that all threads blocked on267* the file descriptor are notified via a wakeup signal.268*269* fd1 < 0 => close(fd2)270* fd1 >= 0 => dup2(fd1, fd2)271*272* Returns -1 with errno set if operation fails.273*/274static int closefd(int fd1, int fd2) {275int rv, orig_errno;276fdEntry_t *fdEntry = getFdEntry(fd2);277if (fdEntry == NULL) {278errno = EBADF;279return -1;280}281282/*283* Lock the fd to hold-off additional I/O on this fd.284*/285pthread_mutex_lock(&(fdEntry->lock));286287{288/*289* And close/dup the file descriptor290* (restart if interrupted by signal)291*/292if (fd1 < 0) {293rv = close(fd2);294} else {295do {296rv = dup2(fd1, fd2);297} while (rv == -1 && errno == EINTR);298}299300/*301* Send a wakeup signal to all threads blocked on this302* file descriptor.303*/304threadEntry_t *curr = fdEntry->threads;305while (curr != NULL) {306curr->intr = 1;307pthread_kill( curr->thr, WAKEUP_SIGNAL);308curr = curr->next;309}310}311312/*313* Unlock without destroying errno314*/315orig_errno = errno;316pthread_mutex_unlock(&(fdEntry->lock));317errno = orig_errno;318319return rv;320}321322/*323* Wrapper for dup2 - same semantics as dup2 system call except324* that any threads blocked in an I/O system call on fd2 will be325* preempted and return -1/EBADF;326*/327int NET_Dup2(int fd, int fd2) {328if (fd < 0) {329errno = EBADF;330return -1;331}332return closefd(fd, fd2);333}334335/*336* Wrapper for close - same semantics as close system call337* except that any threads blocked in an I/O on fd will be338* preempted and the I/O system call will return -1/EBADF.339*/340int NET_SocketClose(int fd) {341return closefd(-1, fd);342}343344/************** Basic I/O operations here ***************/345346/*347* Macro to perform a blocking IO operation. Restarts348* automatically if interrupted by signal (other than349* our wakeup signal)350*/351#define BLOCKING_IO_RETURN_INT(FD, FUNC) { \352int ret; \353threadEntry_t self; \354fdEntry_t *fdEntry = getFdEntry(FD); \355if (fdEntry == NULL) { \356errno = EBADF; \357return -1; \358} \359do { \360startOp(fdEntry, &self); \361ret = FUNC; \362endOp(fdEntry, &self); \363} while (ret == -1 && errno == EINTR); \364return ret; \365}366367int NET_Read(int s, void* buf, size_t len) {368BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );369}370371int NET_NonBlockingRead(int s, void* buf, size_t len) {372BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) );373}374375int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,376struct sockaddr *from, socklen_t *fromlen) {377BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );378}379380int NET_Send(int s, void *msg, int len, unsigned int flags) {381BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );382}383384int NET_SendTo(int s, const void *msg, int len, unsigned int385flags, const struct sockaddr *to, int tolen) {386BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );387}388389int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {390BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );391}392393int NET_Connect(int s, struct sockaddr *addr, int addrlen) {394BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );395}396397int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {398BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );399}400401/*402* Wrapper for poll(s, timeout).403* Auto restarts with adjusted timeout if interrupted by404* signal other than our wakeup signal.405*/406int NET_Timeout(JNIEnv *env, int s, long timeout, jlong nanoTimeStamp) {407jlong prevNanoTime = nanoTimeStamp;408jlong nanoTimeout = (jlong)timeout * NET_NSEC_PER_MSEC;409fdEntry_t *fdEntry = getFdEntry(s);410411/*412* Check that fd hasn't been closed.413*/414if (fdEntry == NULL) {415errno = EBADF;416return -1;417}418419for(;;) {420struct pollfd pfd;421int rv;422threadEntry_t self;423424/*425* Poll the fd. If interrupted by our wakeup signal426* errno will be set to EBADF.427*/428pfd.fd = s;429pfd.events = POLLIN | POLLERR;430431startOp(fdEntry, &self);432rv = poll(&pfd, 1, nanoTimeout / NET_NSEC_PER_MSEC);433endOp(fdEntry, &self);434/*435* If interrupted then adjust timeout. If timeout436* has expired return 0 (indicating timeout expired).437*/438if (rv < 0 && errno == EINTR) {439if (timeout > 0) {440jlong newNanoTime = JVM_NanoTime(env, 0);441nanoTimeout -= newNanoTime - prevNanoTime;442if (nanoTimeout < NET_NSEC_PER_MSEC) {443return 0;444}445prevNanoTime = newNanoTime;446} else {447continue; // timeout is -1, so loop again.448}449} else {450return rv;451}452}453}454455456