Path: blob/aarch64-shenandoah-jdk8u272-b10/hotspot/src/os/linux/vm/osContainer_linux.cpp
32285 views
/*1* Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#include <string.h>25#include <math.h>26#include <errno.h>27#include "utilities/globalDefinitions.hpp"28#include "memory/allocation.hpp"29#include "runtime/os.hpp"30#include "osContainer_linux.hpp"3132#define PER_CPU_SHARES 10243334bool OSContainer::_is_initialized = false;35bool OSContainer::_is_containerized = false;36int OSContainer::_active_processor_count = 1;37julong _unlimited_memory;3839class CgroupSubsystem: CHeapObj<mtInternal> {40friend class OSContainer;4142private:43volatile jlong _next_check_counter;4445/* mountinfo contents */46char *_root;47char *_mount_point;4849/* Constructed subsystem directory */50char *_path;5152public:53CgroupSubsystem(char *root, char *mountpoint) {54_root = os::strdup(root);55_mount_point = os::strdup(mountpoint);56_path = NULL;57_next_check_counter = min_jlong;58}5960/*61* Set directory to subsystem specific files based62* on the contents of the mountinfo and cgroup files.63*/64void set_subsystem_path(char *cgroup_path) {65char buf[MAXPATHLEN+1];66if (_root != NULL && cgroup_path != NULL) {67if (strcmp(_root, "/") == 0) {68int buflen;69strncpy(buf, _mount_point, MAXPATHLEN);70buf[MAXPATHLEN-1] = '\0';71if (strcmp(cgroup_path,"/") != 0) {72buflen = strlen(buf);73if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) {74return;75}76strncat(buf, cgroup_path, MAXPATHLEN-buflen);77buf[MAXPATHLEN-1] = '\0';78}79_path = os::strdup(buf);80} else {81if (strcmp(_root, cgroup_path) == 0) {82strncpy(buf, _mount_point, MAXPATHLEN);83buf[MAXPATHLEN-1] = '\0';84_path = os::strdup(buf);85} else {86char *p = strstr(cgroup_path, _root);87if (p != NULL && p == _root) {88if (strlen(cgroup_path) > strlen(_root)) {89int buflen;90strncpy(buf, _mount_point, MAXPATHLEN);91buf[MAXPATHLEN-1] = '\0';92buflen = strlen(buf);93if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) {94return;95}96strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen);97buf[MAXPATHLEN-1] = '\0';98_path = os::strdup(buf);99}100}101}102}103}104}105106char *subsystem_path() { return _path; }107108bool cache_has_expired() {109return os::elapsed_counter() > _next_check_counter;110}111112void set_cache_expiry_time(jlong timeout) {113_next_check_counter = os::elapsed_counter() + timeout;114}115};116117class CgroupMemorySubsystem: CgroupSubsystem {118friend class OSContainer;119120private:121/* Some container runtimes set limits via cgroup122* hierarchy. If set to true consider also memory.stat123* file if everything else seems unlimited */124bool _uses_mem_hierarchy;125126public:127CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) {128_uses_mem_hierarchy = false;129}130131bool is_hierarchical() { return _uses_mem_hierarchy; }132void set_hierarchical(bool value) { _uses_mem_hierarchy = value; }133};134135CgroupMemorySubsystem* memory = NULL;136CgroupSubsystem* cpuset = NULL;137CgroupSubsystem* cpu = NULL;138CgroupSubsystem* cpuacct = NULL;139140typedef char * cptr;141142PRAGMA_DIAG_PUSH143PRAGMA_FORMAT_NONLITERAL_IGNORED144template <typename T> int subsystem_file_line_contents(CgroupSubsystem* c,145const char *filename,146const char *matchline,147const char *scan_fmt,148T returnval) {149FILE *fp = NULL;150char *p;151char file[MAXPATHLEN+1];152char buf[MAXPATHLEN+1];153char discard[MAXPATHLEN+1];154bool found_match = false;155156if (c == NULL) {157if (PrintContainerInfo) {158tty->print_cr("subsystem_file_line_contents: CgroupSubsytem* is NULL");159}160return OSCONTAINER_ERROR;161}162if (c->subsystem_path() == NULL) {163if (PrintContainerInfo) {164tty->print_cr("subsystem_file_line_contents: subsystem path is NULL");165}166return OSCONTAINER_ERROR;167}168169strncpy(file, c->subsystem_path(), MAXPATHLEN);170file[MAXPATHLEN-1] = '\0';171int filelen = strlen(file);172if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {173if (PrintContainerInfo) {174tty->print_cr("File path too long %s, %s", file, filename);175}176return OSCONTAINER_ERROR;177}178strncat(file, filename, MAXPATHLEN-filelen);179if (PrintContainerInfo) {180tty->print_cr("Path to %s is %s", filename, file);181}182fp = fopen(file, "r");183if (fp != NULL) {184int err = 0;185while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {186found_match = false;187if (matchline == NULL) {188// single-line file case189int matched = sscanf(p, scan_fmt, returnval);190found_match = (matched == 1);191} else {192// multi-line file case193if (strstr(p, matchline) != NULL) {194// discard matchline string prefix195int matched = sscanf(p, scan_fmt, discard, returnval);196found_match = (matched == 2);197} else {198continue; // substring not found199}200}201if (found_match) {202fclose(fp);203return 0;204} else {205err = 1;206if (PrintContainerInfo) {207tty->print_cr("Type %s not found in file %s", scan_fmt, file);208}209}210if (err == 0 && PrintContainerInfo) {211tty->print_cr("Empty file %s", file);212}213}214} else {215if (PrintContainerInfo) {216tty->print_cr("Open of file %s failed, %s", file, strerror(errno));217}218}219if (fp != NULL)220fclose(fp);221return OSCONTAINER_ERROR;222}223PRAGMA_DIAG_POP224225#define GET_CONTAINER_INFO(return_type, subsystem, filename, \226logstring, scan_fmt, variable) \227return_type variable; \228{ \229int err; \230err = subsystem_file_line_contents(subsystem, \231filename, \232NULL, \233scan_fmt, \234&variable); \235if (err != 0) \236return (return_type) OSCONTAINER_ERROR; \237\238if (PrintContainerInfo) \239tty->print_cr(logstring, variable); \240}241242#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \243logstring, scan_fmt, variable, bufsize) \244char variable[bufsize]; \245{ \246int err; \247err = subsystem_file_line_contents(subsystem, \248filename, \249NULL, \250scan_fmt, \251variable); \252if (err != 0) \253return (return_type) NULL; \254\255if (PrintContainerInfo) \256tty->print_cr(logstring, variable); \257}258259#define GET_CONTAINER_INFO_LINE(return_type, subsystem, filename, \260matchline, logstring, scan_fmt, variable) \261return_type variable; \262{ \263int err; \264err = subsystem_file_line_contents(subsystem, \265filename, \266matchline, \267scan_fmt, \268&variable); \269if (err != 0) \270return (return_type) OSCONTAINER_ERROR; \271\272if (PrintContainerInfo) \273tty->print_cr(logstring, variable); \274}275276277/* init278*279* Initialize the container support and determine if280* we are running under cgroup control.281*/282void OSContainer::init() {283FILE *mntinfo = NULL;284FILE *cgroup = NULL;285char buf[MAXPATHLEN+1];286char tmproot[MAXPATHLEN+1];287char tmpmount[MAXPATHLEN+1];288char *p;289jlong mem_limit;290291assert(!_is_initialized, "Initializing OSContainer more than once");292293_is_initialized = true;294_is_containerized = false;295296_unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size();297298if (PrintContainerInfo) {299tty->print_cr("OSContainer::init: Initializing Container Support");300}301if (!UseContainerSupport) {302if (PrintContainerInfo) {303tty->print_cr("Container Support not enabled");304}305return;306}307308/*309* Find the cgroup mount point for memory and cpuset310* by reading /proc/self/mountinfo311*312* Example for docker:313* 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory314*315* Example for host:316* 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory317*/318mntinfo = fopen("/proc/self/mountinfo", "r");319if (mntinfo == NULL) {320if (PrintContainerInfo) {321tty->print_cr("Can't open /proc/self/mountinfo, %s",322strerror(errno));323}324return;325}326327while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {328char tmpcgroups[MAXPATHLEN+1];329char *cptr = tmpcgroups;330char *token;331332// mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt333if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {334continue;335}336while ((token = strsep(&cptr, ",")) != NULL) {337if (strcmp(token, "memory") == 0) {338memory = new CgroupMemorySubsystem(tmproot, tmpmount);339} else if (strcmp(token, "cpuset") == 0) {340cpuset = new CgroupSubsystem(tmproot, tmpmount);341} else if (strcmp(token, "cpu") == 0) {342cpu = new CgroupSubsystem(tmproot, tmpmount);343} else if (strcmp(token, "cpuacct") == 0) {344cpuacct= new CgroupSubsystem(tmproot, tmpmount);345}346}347}348fclose(mntinfo);349350if (memory == NULL) {351if (PrintContainerInfo) {352tty->print_cr("Required cgroup memory subsystem not found");353}354return;355}356if (cpuset == NULL) {357if (PrintContainerInfo) {358tty->print_cr("Required cgroup cpuset subsystem not found");359}360return;361}362if (cpu == NULL) {363if (PrintContainerInfo) {364tty->print_cr("Required cgroup cpu subsystem not found");365}366return;367}368if (cpuacct == NULL) {369if (PrintContainerInfo) {370tty->print_cr("Required cgroup cpuacct subsystem not found");371}372return;373}374375/*376* Read /proc/self/cgroup and map host mount point to377* local one via /proc/self/mountinfo content above378*379* Docker example:380* 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044381*382* Host example:383* 5:memory:/user.slice384*385* Construct a path to the process specific memory and cpuset386* cgroup directory.387*388* For a container running under Docker from memory example above389* the paths would be:390*391* /sys/fs/cgroup/memory392*393* For a Host from memory example above the path would be:394*395* /sys/fs/cgroup/memory/user.slice396*397*/398cgroup = fopen("/proc/self/cgroup", "r");399if (cgroup == NULL) {400if (PrintContainerInfo) {401tty->print_cr("Can't open /proc/self/cgroup, %s",402strerror(errno));403}404return;405}406407while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {408char *controllers;409char *token;410char *base;411412/* Skip cgroup number */413strsep(&p, ":");414/* Get controllers and base */415controllers = strsep(&p, ":");416base = strsep(&p, "\n");417418if (controllers == NULL) {419continue;420}421422while ((token = strsep(&controllers, ",")) != NULL) {423if (strcmp(token, "memory") == 0) {424memory->set_subsystem_path(base);425jlong hierarchy = uses_mem_hierarchy();426if (hierarchy > 0) {427memory->set_hierarchical(true);428}429} else if (strcmp(token, "cpuset") == 0) {430cpuset->set_subsystem_path(base);431} else if (strcmp(token, "cpu") == 0) {432cpu->set_subsystem_path(base);433} else if (strcmp(token, "cpuacct") == 0) {434cpuacct->set_subsystem_path(base);435}436}437}438439fclose(cgroup);440441// We need to update the amount of physical memory now that442// command line arguments have been processed.443if ((mem_limit = memory_limit_in_bytes()) > 0) {444os::Linux::set_physical_memory(mem_limit);445if (PrintContainerInfo) {446tty->print_cr("Memory Limit is: " JLONG_FORMAT, mem_limit);447}448}449450_is_containerized = true;451452}453454const char * OSContainer::container_type() {455if (is_containerized()) {456return "cgroupv1";457} else {458return NULL;459}460}461462/* uses_mem_hierarchy463*464* Return whether or not hierarchical cgroup accounting is being465* done.466*467* return:468* A number > 0 if true, or469* OSCONTAINER_ERROR for not supported470*/471jlong OSContainer::uses_mem_hierarchy() {472GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy",473"Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy);474return use_hierarchy;475}476477478/* memory_limit_in_bytes479*480* Return the limit of available memory for this process.481*482* return:483* memory limit in bytes or484* -1 for unlimited485* OSCONTAINER_ERROR for not supported486*/487jlong OSContainer::memory_limit_in_bytes() {488GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes",489"Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit);490491if (memlimit >= _unlimited_memory) {492if (PrintContainerInfo) {493tty->print_cr("Non-Hierarchical Memory Limit is: Unlimited");494}495if (memory->is_hierarchical()) {496const char* matchline = "hierarchical_memory_limit";497const char* format = "%s " JULONG_FORMAT;498GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,499"Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit)500if (hier_memlimit >= _unlimited_memory) {501if (PrintContainerInfo) {502tty->print_cr("Hierarchical Memory Limit is: Unlimited");503}504} else {505return (jlong)hier_memlimit;506}507}508return (jlong)-1;509}510else {511return (jlong)memlimit;512}513}514515jlong OSContainer::memory_and_swap_limit_in_bytes() {516GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes",517"Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit);518if (memswlimit >= _unlimited_memory) {519if (PrintContainerInfo) {520tty->print_cr("Non-Hierarchical Memory and Swap Limit is: Unlimited");521}522if (memory->is_hierarchical()) {523const char* matchline = "hierarchical_memsw_limit";524const char* format = "%s " JULONG_FORMAT;525GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline,526"Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit)527if (hier_memlimit >= _unlimited_memory) {528if (PrintContainerInfo) {529tty->print_cr("Hierarchical Memory and Swap Limit is: Unlimited");530}531} else {532return (jlong)hier_memlimit;533}534}535return (jlong)-1;536} else {537return (jlong)memswlimit;538}539}540541jlong OSContainer::memory_soft_limit_in_bytes() {542GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes",543"Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit);544if (memsoftlimit >= _unlimited_memory) {545if (PrintContainerInfo) {546tty->print_cr("Memory Soft Limit is: Unlimited");547}548return (jlong)-1;549} else {550return (jlong)memsoftlimit;551}552}553554/* memory_usage_in_bytes555*556* Return the amount of used memory for this process.557*558* return:559* memory usage in bytes or560* -1 for unlimited561* OSCONTAINER_ERROR for not supported562*/563jlong OSContainer::memory_usage_in_bytes() {564GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes",565"Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage);566return memusage;567}568569/* memory_max_usage_in_bytes570*571* Return the maximum amount of used memory for this process.572*573* return:574* max memory usage in bytes or575* OSCONTAINER_ERROR for not supported576*/577jlong OSContainer::memory_max_usage_in_bytes() {578GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes",579"Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage);580return memmaxusage;581}582583/* active_processor_count584*585* Calculate an appropriate number of active processors for the586* VM to use based on these three inputs.587*588* cpu affinity589* cgroup cpu quota & cpu period590* cgroup cpu shares591*592* Algorithm:593*594* Determine the number of available CPUs from sched_getaffinity595*596* If user specified a quota (quota != -1), calculate the number of597* required CPUs by dividing quota by period.598*599* If shares are in effect (shares != -1), calculate the number600* of CPUs required for the shares by dividing the share value601* by PER_CPU_SHARES.602*603* All results of division are rounded up to the next whole number.604*605* If neither shares or quotas have been specified, return the606* number of active processors in the system.607*608* If both shares and quotas have been specified, the results are609* based on the flag PreferContainerQuotaForCPUCount. If true,610* return the quota value. If false return the smallest value611* between shares or quotas.612*613* If shares and/or quotas have been specified, the resulting number614* returned will never exceed the number of active processors.615*616* return:617* number of CPUs618*/619int OSContainer::active_processor_count() {620int quota_count = 0, share_count = 0;621int cpu_count, limit_count;622int result;623624// We use a cache with a timeout to avoid performing expensive625// computations in the event this function is called frequently.626// [See 8227006].627if (!cpu->cache_has_expired()) {628if (PrintContainerInfo) {629tty->print_cr("OSContainer::active_processor_count (cached): %d", OSContainer::_active_processor_count);630}631632return OSContainer::_active_processor_count;633}634635cpu_count = limit_count = os::Linux::active_processor_count();636int quota = cpu_quota();637int period = cpu_period();638int share = cpu_shares();639640if (quota > -1 && period > 0) {641quota_count = ceilf((float)quota / (float)period);642if (PrintContainerInfo) {643tty->print_cr("CPU Quota count based on quota/period: %d", quota_count);644}645}646if (share > -1) {647share_count = ceilf((float)share / (float)PER_CPU_SHARES);648if (PrintContainerInfo) {649tty->print_cr("CPU Share count based on shares: %d", share_count);650}651}652653// If both shares and quotas are setup results depend654// on flag PreferContainerQuotaForCPUCount.655// If true, limit CPU count to quota656// If false, use minimum of shares and quotas657if (quota_count !=0 && share_count != 0) {658if (PreferContainerQuotaForCPUCount) {659limit_count = quota_count;660} else {661limit_count = MIN2(quota_count, share_count);662}663} else if (quota_count != 0) {664limit_count = quota_count;665} else if (share_count != 0) {666limit_count = share_count;667}668669result = MIN2(cpu_count, limit_count);670if (PrintContainerInfo) {671tty->print_cr("OSContainer::active_processor_count: %d", result);672}673674// Update the value and reset the cache timeout675OSContainer::_active_processor_count = result;676cpu->set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT);677678return result;679}680681char * OSContainer::cpu_cpuset_cpus() {682GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus",683"cpuset.cpus is: %s", "%1023s", cpus, 1024);684return os::strdup(cpus);685}686687char * OSContainer::cpu_cpuset_memory_nodes() {688GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems",689"cpuset.mems is: %s", "%1023s", mems, 1024);690return os::strdup(mems);691}692693/* cpu_quota694*695* Return the number of milliseconds per period696* process is guaranteed to run.697*698* return:699* quota time in milliseconds700* -1 for no quota701* OSCONTAINER_ERROR for not supported702*/703int OSContainer::cpu_quota() {704GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us",705"CPU Quota is: %d", "%d", quota);706return quota;707}708709int OSContainer::cpu_period() {710GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us",711"CPU Period is: %d", "%d", period);712return period;713}714715/* cpu_shares716*717* Return the amount of cpu shares available to the process718*719* return:720* Share number (typically a number relative to 1024)721* (2048 typically expresses 2 CPUs worth of processing)722* -1 for no share setup723* OSCONTAINER_ERROR for not supported724*/725int OSContainer::cpu_shares() {726GET_CONTAINER_INFO(int, cpu, "/cpu.shares",727"CPU Shares is: %d", "%d", shares);728// Convert 1024 to no shares setup729if (shares == 1024) return -1;730731return shares;732}733734735736