Path: blob/master/src/hotspot/os/linux/cgroupSubsystem_linux.hpp
64440 views
/*1* Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#ifndef CGROUP_SUBSYSTEM_LINUX_HPP25#define CGROUP_SUBSYSTEM_LINUX_HPP2627#include "memory/allocation.hpp"28#include "runtime/os.hpp"29#include "logging/log.hpp"30#include "utilities/globalDefinitions.hpp"31#include "utilities/macros.hpp"32#include "osContainer_linux.hpp"3334// Shared cgroups code (used by cgroup version 1 and version 2)3536/*37* PER_CPU_SHARES has been set to 1024 because CPU shares' quota38* is commonly used in cloud frameworks like Kubernetes[1],39* AWS[2] and Mesos[3] in a similar way. They spawn containers with40* --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do41* the inverse for determining the number of possible available42* CPUs to the JVM inside a container. See JDK-8216366.43*44* [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu45* In particular:46* When using Docker:47* The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially48* fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the49* --cpu-shares flag in the docker run command.50* [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html51* [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L64852* https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L3053*/54#define PER_CPU_SHARES 10245556#define CGROUPS_V1 157#define CGROUPS_V2 258#define INVALID_CGROUPS_V2 359#define INVALID_CGROUPS_V1 460#define INVALID_CGROUPS_NO_MOUNT 561#define INVALID_CGROUPS_GENERIC 66263// Five controllers: cpu, cpuset, cpuacct, memory, pids64#define CG_INFO_LENGTH 565#define CPUSET_IDX 066#define CPU_IDX 167#define CPUACCT_IDX 268#define MEMORY_IDX 369#define PIDS_IDX 47071typedef char * cptr;7273class CgroupController: public CHeapObj<mtInternal> {74public:75virtual char *subsystem_path() = 0;76};7778PRAGMA_DIAG_PUSH79PRAGMA_FORMAT_NONLITERAL_IGNORED80template <typename T> int subsystem_file_line_contents(CgroupController* c,81const char *filename,82const char *matchline,83const char *scan_fmt,84T returnval) {85FILE *fp = NULL;86char *p;87char file[MAXPATHLEN+1];88char buf[MAXPATHLEN+1];89char discard[MAXPATHLEN+1];90bool found_match = false;9192if (c == NULL) {93log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL");94return OSCONTAINER_ERROR;95}96if (c->subsystem_path() == NULL) {97log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL");98return OSCONTAINER_ERROR;99}100101strncpy(file, c->subsystem_path(), MAXPATHLEN);102file[MAXPATHLEN-1] = '\0';103int filelen = strlen(file);104if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) {105log_debug(os, container)("File path too long %s, %s", file, filename);106return OSCONTAINER_ERROR;107}108strncat(file, filename, MAXPATHLEN-filelen);109log_trace(os, container)("Path to %s is %s", filename, file);110fp = fopen(file, "r");111if (fp != NULL) {112int err = 0;113while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) {114found_match = false;115if (matchline == NULL) {116// single-line file case117int matched = sscanf(p, scan_fmt, returnval);118found_match = (matched == 1);119} else {120// multi-line file case121if (strstr(p, matchline) != NULL) {122// discard matchline string prefix123int matched = sscanf(p, scan_fmt, discard, returnval);124found_match = (matched == 2);125} else {126continue; // substring not found127}128}129if (found_match) {130fclose(fp);131return 0;132} else {133err = 1;134log_debug(os, container)("Type %s not found in file %s", scan_fmt, file);135}136}137if (err == 0) {138log_debug(os, container)("Empty file %s", file);139}140} else {141log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno));142}143if (fp != NULL)144fclose(fp);145return OSCONTAINER_ERROR;146}147PRAGMA_DIAG_POP148149#define GET_CONTAINER_INFO(return_type, subsystem, filename, \150logstring, scan_fmt, variable) \151return_type variable; \152{ \153int err; \154err = subsystem_file_line_contents(subsystem, \155filename, \156NULL, \157scan_fmt, \158&variable); \159if (err != 0) { \160log_trace(os, container)(logstring, (return_type) OSCONTAINER_ERROR); \161return (return_type) OSCONTAINER_ERROR; \162} \163\164log_trace(os, container)(logstring, variable); \165}166167#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \168logstring, scan_fmt, variable, bufsize) \169char variable[bufsize]; \170{ \171int err; \172err = subsystem_file_line_contents(subsystem, \173filename, \174NULL, \175scan_fmt, \176variable); \177if (err != 0) \178return (return_type) NULL; \179\180log_trace(os, container)(logstring, variable); \181}182183#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \184matchline, logstring, scan_fmt, variable) \185return_type variable; \186{ \187int err; \188err = subsystem_file_line_contents(controller, \189filename, \190matchline, \191scan_fmt, \192&variable); \193if (err != 0) \194return (return_type) OSCONTAINER_ERROR; \195\196log_trace(os, container)(logstring, variable); \197}198199200class CachedMetric : public CHeapObj<mtInternal>{201private:202volatile jlong _metric;203volatile jlong _next_check_counter;204public:205CachedMetric() {206_metric = -1;207_next_check_counter = min_jlong;208}209bool should_check_metric() {210return os::elapsed_counter() > _next_check_counter;211}212jlong value() { return _metric; }213void set_value(jlong value, jlong timeout) {214_metric = value;215// Metric is unlikely to change, but we want to remain216// responsive to configuration changes. A very short grace time217// between re-read avoids excessive overhead during startup without218// significantly reducing the VMs ability to promptly react to changed219// metric config220_next_check_counter = os::elapsed_counter() + timeout;221}222};223224class CachingCgroupController : public CHeapObj<mtInternal> {225private:226CgroupController* _controller;227CachedMetric* _metrics_cache;228229public:230CachingCgroupController(CgroupController* cont) {231_controller = cont;232_metrics_cache = new CachedMetric();233}234235CachedMetric* metrics_cache() { return _metrics_cache; }236CgroupController* controller() { return _controller; }237};238239class CgroupSubsystem: public CHeapObj<mtInternal> {240public:241jlong memory_limit_in_bytes();242int active_processor_count();243jlong limit_from_str(char* limit_str);244245virtual int cpu_quota() = 0;246virtual int cpu_period() = 0;247virtual int cpu_shares() = 0;248virtual jlong pids_max() = 0;249virtual jlong pids_current() = 0;250virtual jlong memory_usage_in_bytes() = 0;251virtual jlong memory_and_swap_limit_in_bytes() = 0;252virtual jlong memory_soft_limit_in_bytes() = 0;253virtual jlong memory_max_usage_in_bytes() = 0;254virtual char * cpu_cpuset_cpus() = 0;255virtual char * cpu_cpuset_memory_nodes() = 0;256virtual jlong read_memory_limit_in_bytes() = 0;257virtual const char * container_type() = 0;258virtual CachingCgroupController* memory_controller() = 0;259virtual CachingCgroupController* cpu_controller() = 0;260};261262// Utility class for storing info retrieved from /proc/cgroups,263// /proc/self/cgroup and /proc/self/mountinfo264// For reference see man 7 cgroups and CgroupSubsystemFactory265class CgroupInfo : public StackObj {266friend class CgroupSubsystemFactory;267friend class WhiteBox;268269private:270char* _name;271int _hierarchy_id;272bool _enabled;273bool _data_complete; // indicating cgroup v1 data is complete for this controller274char* _cgroup_path; // cgroup controller path from /proc/self/cgroup275char* _root_mount_path; // root mount path from /proc/self/mountinfo. Unused for cgroup v2276char* _mount_path; // mount path from /proc/self/mountinfo.277278public:279CgroupInfo() {280_name = NULL;281_hierarchy_id = -1;282_enabled = false;283_data_complete = false;284_cgroup_path = NULL;285_root_mount_path = NULL;286_mount_path = NULL;287}288289};290291class CgroupSubsystemFactory: AllStatic {292friend class WhiteBox;293294public:295static CgroupSubsystem* create();296private:297static inline bool is_cgroup_v2(u1* flags) {298return *flags == CGROUPS_V2;299}300301#ifdef ASSERT302static inline bool is_valid_cgroup(u1* flags) {303return *flags == CGROUPS_V1 || *flags == CGROUPS_V2;304}305static inline bool is_cgroup_v1(u1* flags) {306return *flags == CGROUPS_V1;307}308#endif309310// Determine the cgroup type (version 1 or version 2), given311// relevant paths to files. Sets 'flags' accordingly.312static bool determine_type(CgroupInfo* cg_infos,313const char* proc_cgroups,314const char* proc_self_cgroup,315const char* proc_self_mountinfo,316u1* flags);317static void cleanup(CgroupInfo* cg_infos);318};319320#endif // CGROUP_SUBSYSTEM_LINUX_HPP321322323