Path: blob/main/operations/observability/mixins/cross-teams/dashboards/gitpod-cluster-autoscaler-k3s.json
2500 views
{1"__elements": [],2"__requires": [3{4"type": "panel",5"id": "gauge",6"name": "Gauge",7"version": ""8},9{10"type": "grafana",11"id": "grafana",12"name": "Grafana",13"version": "8.3.1"14},15{16"type": "panel",17"id": "graph",18"name": "Graph (old)",19"version": ""20},21{22"type": "datasource",23"id": "prometheus",24"name": "Prometheus",25"version": "1.0.0"26},27{28"type": "panel",29"id": "stat",30"name": "Stat",31"version": ""32}33],34"annotations": {35"list": [36{37"builtIn": 1,38"datasource": "-- Grafana --",39"enable": true,40"hide": true,41"iconColor": "rgba(0, 211, 255, 1)",42"name": "Annotations & Alerts",43"target": {44"limit": 100,45"matchAny": false,46"tags": [],47"type": "dashboard"48},49"type": "dashboard"50}51]52},53"description": "Super simple dashboard showing an overview of kubernetes cluster autoscaling activity and status, using metrics reported by the autoscaler to prometheus.\r\n\r\nhttps://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler",54"editable": true,55"fiscalYearStartMonth": 0,56"gnetId": null,57"graphTooltip": 0,58"id": null,59"links": [],60"liveNow": false,61"panels": [62{63"datasource": "$datasource",64"fieldConfig": {65"defaults": {66"color": {67"mode": "thresholds"68},69"mappings": [70{71"options": {72"match": "null",73"result": {74"text": "N/A"75}76},77"type": "special"78}79],80"thresholds": {81"mode": "absolute",82"steps": [83{84"color": "green",85"value": null86},87{88"color": "red",89"value": 8090}91]92},93"unit": "none"94},95"overrides": []96},97"gridPos": {98"h": 4,99"w": 4,100"x": 0,101"y": 0102},103"id": 4,104"links": [],105"maxDataPoints": 100,106"options": {107"colorMode": "none",108"graphMode": "none",109"justifyMode": "auto",110"orientation": "horizontal",111"reduceOptions": {112"calcs": [113"lastNotNull"114],115"fields": "",116"values": false117},118"textMode": "auto"119},120"pluginVersion": "8.3.1",121"targets": [122{123"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\"})\n",124"format": "time_series",125"intervalFactor": 2,126"refId": "A",127"step": 600128}129],130"title": "Total nodes",131"type": "stat"132},133{134"datasource": "$datasource",135"description": "Shows the nodes which are ready as a percent of the total nodes",136"fieldConfig": {137"defaults": {138"color": {139"mode": "thresholds"140},141"mappings": [142{143"options": {144"match": "null",145"result": {146"text": "N/A"147}148},149"type": "special"150}151],152"max": 100,153"min": 0,154"thresholds": {155"mode": "absolute",156"steps": [157{158"color": "rgba(50, 172, 45, 0.97)",159"value": null160},161{162"color": "rgba(237, 129, 40, 0.89)",163"value": 100164},165{166"color": "rgba(245, 54, 54, 0.9)",167"value": 95168}169]170},171"unit": "percent"172},173"overrides": []174},175"gridPos": {176"h": 4,177"w": 4,178"x": 4,179"y": 0180},181"id": 6,182"links": [],183"maxDataPoints": 100,184"options": {185"orientation": "horizontal",186"reduceOptions": {187"calcs": [188"lastNotNull"189],190"fields": "",191"values": false192},193"showThresholdLabels": false,194"showThresholdMarkers": false195},196"pluginVersion": "8.3.1",197"targets": [198{199"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\", state=\"ready\"})/sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\"})*100",200"format": "time_series",201"intervalFactor": 2,202"refId": "A",203"step": 600204}205],206"title": "Nodes available",207"type": "gauge"208},209{210"datasource": "$datasource",211"fieldConfig": {212"defaults": {213"color": {214"mode": "thresholds"215},216"mappings": [217{218"options": {219"0": {220"text": "No"221},222"1": {223"text": "Yes"224}225},226"type": "value"227}228],229"thresholds": {230"mode": "absolute",231"steps": [232{233"color": "rgba(245, 54, 54, 0.9)",234"value": null235},236{237"color": "rgba(237, 129, 40, 0.89)",238"value": 0239},240{241"color": "rgba(50, 172, 45, 0.97)",242"value": 1243}244]245},246"unit": "none"247},248"overrides": []249},250"gridPos": {251"h": 4,252"w": 4,253"x": 8,254"y": 0255},256"id": 9,257"links": [],258"maxDataPoints": 100,259"options": {260"colorMode": "background",261"graphMode": "none",262"justifyMode": "auto",263"orientation": "horizontal",264"reduceOptions": {265"calcs": [266"lastNotNull"267],268"fields": "",269"values": false270},271"textMode": "auto"272},273"pluginVersion": "8.3.1",274"targets": [275{276"datasource": "$datasource",277"exemplar": true,278"expr": "sum(cluster_autoscaler_cluster_safe_to_autoscale{cluster=~\"$cluster\"})",279"format": "time_series",280"interval": "",281"intervalFactor": 2,282"legendFormat": "",283"refId": "A",284"step": 600285}286],287"title": "Is cluster safe to scale?",288"type": "stat"289},290{291"datasource": "$datasource",292"description": "Tells you if there are unscheduled pods",293"fieldConfig": {294"defaults": {295"color": {296"mode": "thresholds"297},298"mappings": [],299"thresholds": {300"mode": "absolute",301"steps": [302{303"color": "rgba(50, 172, 45, 0.97)",304"value": null305},306{307"color": "rgba(237, 129, 40, 0.89)",308"value": 1309},310{311"color": "rgba(245, 54, 54, 0.9)"312}313]314},315"unit": "none"316},317"overrides": []318},319"gridPos": {320"h": 4,321"w": 4,322"x": 12,323"y": 0324},325"id": 12,326"links": [],327"maxDataPoints": 100,328"options": {329"colorMode": "background",330"graphMode": "none",331"justifyMode": "auto",332"orientation": "horizontal",333"reduceOptions": {334"calcs": [335"lastNotNull"336],337"fields": "",338"values": false339},340"textMode": "auto"341},342"pluginVersion": "8.3.1",343"targets": [344{345"expr": "sum(cluster_autoscaler_unschedulable_pods_count{cluster=~\"$cluster\"})",346"format": "time_series",347"intervalFactor": 1,348"legendFormat": " pods",349"refId": "A",350"step": 300351}352],353"title": "Number of unscheduled pods",354"type": "stat"355},356{357"datasource": "$datasource",358"fieldConfig": {359"defaults": {360"color": {361"mode": "thresholds"362},363"mappings": [364{365"options": {366"match": "null",367"result": {368"text": "N/A"369}370},371"type": "special"372}373],374"thresholds": {375"mode": "absolute",376"steps": [377{378"color": "green",379"value": null380},381{382"color": "red",383"value": 80384}385]386},387"unit": "s"388},389"overrides": []390},391"gridPos": {392"h": 4,393"w": 4,394"x": 16,395"y": 0396},397"id": 7,398"links": [],399"maxDataPoints": 100,400"options": {401"colorMode": "none",402"graphMode": "none",403"justifyMode": "auto",404"orientation": "horizontal",405"reduceOptions": {406"calcs": [407"lastNotNull"408],409"fields": "",410"values": false411},412"textMode": "auto"413},414"pluginVersion": "8.3.1",415"targets": [416{417"expr": "sum(time()-cluster_autoscaler_last_activity{cluster=~\"$cluster\", activity=\"scaleDown\"})",418"format": "time_series",419"intervalFactor": 2,420"legendFormat": "",421"refId": "A",422"step": 600423}424],425"title": "Last scaleDown activity",426"type": "stat"427},428{429"datasource": "$datasource",430"fieldConfig": {431"defaults": {432"color": {433"mode": "thresholds"434},435"mappings": [436{437"options": {438"match": "null",439"result": {440"text": "N/A"441}442},443"type": "special"444}445],446"thresholds": {447"mode": "absolute",448"steps": [449{450"color": "green",451"value": null452},453{454"color": "red",455"value": 80456}457]458},459"unit": "s"460},461"overrides": []462},463"gridPos": {464"h": 4,465"w": 4,466"x": 20,467"y": 0468},469"id": 8,470"links": [],471"maxDataPoints": 100,472"options": {473"colorMode": "none",474"graphMode": "none",475"justifyMode": "auto",476"orientation": "horizontal",477"reduceOptions": {478"calcs": [479"lastNotNull"480],481"fields": "",482"values": false483},484"textMode": "auto"485},486"pluginVersion": "8.3.1",487"targets": [488{489"expr": "sum(time()-cluster_autoscaler_last_activity{cluster=~\"$cluster\", activity=\"autoscaling\"})",490"format": "time_series",491"intervalFactor": 2,492"legendFormat": "",493"refId": "A",494"step": 600495}496],497"title": "Last autoscale activity",498"type": "stat"499},500{501"alerting": {},502"aliasColors": {},503"bars": false,504"dashLength": 10,505"dashes": false,506"datasource": "$datasource",507"description": "Shows the evicted and unscheduled pods",508"editable": true,509"error": false,510"fill": 1,511"fillGradient": 0,512"grid": {},513"gridPos": {514"h": 7,515"w": 12,516"x": 0,517"y": 4518},519"height": "250px",520"hiddenSeries": false,521"id": 11,522"legend": {523"avg": false,524"current": false,525"max": false,526"min": false,527"show": true,528"total": false,529"values": false530},531"lines": true,532"linewidth": 2,533"links": [],534"nullPointMode": "null as zero",535"options": {536"alertThreshold": true537},538"percentage": false,539"pluginVersion": "8.3.1",540"pointradius": 5,541"points": false,542"renderer": "flot",543"seriesOverrides": [],544"spaceLength": 10,545"stack": false,546"steppedLine": false,547"targets": [548{549"expr": "sum(cluster_autoscaler_evicted_pods_total{cluster=~\"$cluster\"})",550"format": "time_series",551"hide": false,552"intervalFactor": 10,553"legendFormat": "evicted pods",554"metric": "",555"refId": "A",556"step": 300557},558{559"expr": "sum(cluster_autoscaler_unschedulable_pods_count{cluster=~\"$cluster\"})",560"format": "time_series",561"intervalFactor": 2,562"legendFormat": "unscheduled pods",563"refId": "B",564"step": 60565}566],567"thresholds": [],568"timeRegions": [],569"title": "Pod activity",570"tooltip": {571"msResolution": false,572"shared": true,573"sort": 0,574"value_type": "cumulative"575},576"type": "graph",577"xaxis": {578"mode": "time",579"show": true,580"values": []581},582"yaxes": [583{584"format": "none",585"label": "Num Nodes",586"logBase": 1,587"min": 0,588"show": true589},590{591"format": "short",592"logBase": 1,593"show": true594}595],596"yaxis": {597"align": false598}599},600{601"alerting": {},602"aliasColors": {},603"bars": false,604"dashLength": 10,605"dashes": false,606"datasource": "$datasource",607"description": "Shows the state of the nodes as scaling happens",608"editable": true,609"error": false,610"fill": 1,611"fillGradient": 0,612"grid": {},613"gridPos": {614"h": 7,615"w": 12,616"x": 12,617"y": 4618},619"height": "250px",620"hiddenSeries": false,621"id": 10,622"legend": {623"avg": false,624"current": false,625"max": false,626"min": false,627"show": true,628"total": false,629"values": false630},631"lines": true,632"linewidth": 2,633"links": [],634"nullPointMode": "null as zero",635"options": {636"alertThreshold": true637},638"percentage": false,639"pluginVersion": "8.3.1",640"pointradius": 5,641"points": false,642"renderer": "flot",643"seriesOverrides": [],644"spaceLength": 10,645"stack": false,646"steppedLine": false,647"targets": [648{649"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\", state=\"ready\"})",650"format": "time_series",651"hide": false,652"intervalFactor": 10,653"legendFormat": "ready",654"metric": "",655"refId": "A",656"step": 300657},658{659"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\", state=\"unready\"})",660"format": "time_series",661"intervalFactor": 2,662"legendFormat": "unready",663"refId": "B",664"step": 60665},666{667"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\", state=\"notStarted\"})\n",668"format": "time_series",669"intervalFactor": 2,670"legendFormat": "not started",671"refId": "C",672"step": 60673}674],675"thresholds": [],676"timeRegions": [],677"title": "Node activity",678"tooltip": {679"msResolution": false,680"shared": true,681"sort": 0,682"value_type": "cumulative"683},684"type": "graph",685"xaxis": {686"mode": "time",687"show": true,688"values": []689},690"yaxes": [691{692"format": "none",693"label": "Num Nodes",694"logBase": 1,695"min": 0,696"show": true697},698{699"format": "short",700"logBase": 1,701"show": true702}703],704"yaxis": {705"align": false706}707},708{709"alerting": {},710"aliasColors": {},711"bars": false,712"dashLength": 10,713"dashes": false,714"datasource": "$datasource",715"editable": true,716"error": false,717"fill": 1,718"fillGradient": 0,719"grid": {},720"gridPos": {721"h": 7,722"w": 16,723"x": 0,724"y": 11725},726"height": "250px",727"hiddenSeries": false,728"id": 3,729"legend": {730"avg": false,731"current": false,732"max": false,733"min": false,734"show": true,735"total": false,736"values": false737},738"lines": true,739"linewidth": 2,740"links": [],741"nullPointMode": "connected",742"options": {743"alertThreshold": true744},745"percentage": false,746"pluginVersion": "8.3.1",747"pointradius": 5,748"points": false,749"renderer": "flot",750"seriesOverrides": [],751"spaceLength": 10,752"stack": false,753"steppedLine": false,754"targets": [755{756"expr": "sum(cluster_autoscaler_scaled_up_nodes_total{cluster=~\"$cluster\"})",757"format": "time_series",758"hide": false,759"intervalFactor": 10,760"legendFormat": "scaled up total",761"metric": "",762"refId": "A",763"step": 200764},765{766"expr": "sum(cluster_autoscaler_unneeded_nodes_count{cluster=~\"$cluster\"})",767"format": "time_series",768"intervalFactor": 2,769"legendFormat": "unneeded nodes",770"refId": "B",771"step": 40772},773{774"expr": "sum(cluster_autoscaler_nodes_count{cluster=~\"$cluster\"})\n",775"format": "time_series",776"intervalFactor": 2,777"legendFormat": "total nodes",778"refId": "C",779"step": 40780}781],782"thresholds": [],783"timeRegions": [],784"title": "Autoscaling activity",785"tooltip": {786"msResolution": false,787"shared": true,788"sort": 0,789"value_type": "cumulative"790},791"type": "graph",792"xaxis": {793"mode": "time",794"show": true,795"values": []796},797"yaxes": [798{799"format": "none",800"label": "Num nodes",801"logBase": 1,802"min": 0,803"show": true804},805{806"format": "short",807"logBase": 1,808"show": true809}810],811"yaxis": {812"align": false813}814},815{816"datasource": "$datasource",817"description": "Is the cluster scaling up, down or ticking along okay?",818"fieldConfig": {819"defaults": {820"color": {821"mode": "thresholds"822},823"mappings": [824{825"options": {826"from": -1000,827"result": {828"text": "Down"829},830"to": -1831},832"type": "range"833},834{835"options": {836"from": 1,837"result": {838"text": "Up"839},840"to": 1000841},842"type": "range"843},844{845"options": {846"from": 0,847"result": {848"text": "Nowhere"849},850"to": 0851},852"type": "range"853}854],855"thresholds": {856"mode": "absolute",857"steps": [858{859"color": "green",860"value": null861},862{863"color": "red",864"value": 80865}866]867},868"unit": "none"869},870"overrides": []871},872"gridPos": {873"h": 4,874"w": 8,875"x": 16,876"y": 11877},878"id": 13,879"links": [],880"maxDataPoints": 100,881"options": {882"colorMode": "none",883"graphMode": "none",884"justifyMode": "auto",885"orientation": "horizontal",886"reduceOptions": {887"calcs": [888"lastNotNull"889],890"fields": "",891"values": false892},893"textMode": "auto"894},895"pluginVersion": "8.3.1",896"targets": [897{898"expr": "sum(cluster_autoscaler_scaled_up_nodes_total{cluster=~\"$cluster\"})-sum(cluster_autoscaler_scaled_down_nodes_total{cluster=~\"$cluster\"})",899"format": "time_series",900"intervalFactor": 2,901"legendFormat": "",902"refId": "A",903"step": 600904}905],906"title": "Cluster direction?",907"type": "stat"908}909],910"refresh": "30s",911"schemaVersion": 33,912"style": "dark",913"tags": [],914"templating": {915"list": [916{917"current": {918"selected": true,919"text": "VictoriaMetrics",920"value": "VictoriaMetrics"921},922"hide": 0,923"includeAll": false,924"multi": false,925"name": "datasource",926"options": [],927"query": "prometheus",928"queryValue": "",929"refresh": 1,930"regex": "",931"skipUrlSync": false,932"type": "datasource"933},934{935"current": {},936"datasource": "$datasource",937"definition": "label_values(cluster_autoscaler_nodes_count, cluster)",938"hide": 0,939"includeAll": false,940"multi": false,941"name": "cluster",942"options": [],943"query": {944"query": "label_values(cluster_autoscaler_nodes_count, cluster)",945"refId": "StandardVariableQuery"946},947"refresh": 1,948"regex": "",949"skipUrlSync": false,950"sort": 0,951"type": "query"952}953]954},955"time": {956"from": "now-1h",957"to": "now"958},959"timepicker": {960"refresh_intervals": [961"5s",962"10s",963"30s",964"1m",965"5m",966"15m",967"30m",968"1h",969"2h",970"1d"971],972"time_options": [973"5m",974"15m",975"1h",976"6h",977"12h",978"24h",979"2d",980"7d",981"30d"982]983},984"timezone": "browser",985"title": "Kubernetes Cluster Autoscaler Multicluster",986"uid": "multiautoscaler",987"version": 1,988"weekStart": ""989}990991