CoCalc -- calculate-cache.sh

GitHub Repository: lima-vm/lima
Path: blob/master/hack/calculate-cache.sh
²⁶⁰⁹ views
1
#!/usr/bin/env bash
2

3
# SPDX-FileCopyrightText: Copyright The Lima Authors
4
# SPDX-License-Identifier: Apache-2.0
5

6
# This script calculates the expected content size, actual cached size, and cache-keys used in caching method before and after
7
# implementation in https://github.com/lima-vm/lima/pull/2508
8
#
9
# Answer to the question in https://github.com/lima-vm/lima/pull/2508#discussion_r1699798651
10

11
scriptdir=$(dirname "${BASH_SOURCE[0]}")
12
# shellcheck source=./common.inc.sh
13
. "${scriptdir}/cache-common-inc.sh"
14

15
# usage: [DEBUG=1] ./hack/calculate-cache.sh
16
# DEBUG=1 will save the collected information in .calculate-cache-collected-info-{before,after}.yaml
17
#
18
# This script does:
19
# 1. extracts `runs_on` and `template` from workflow file (.github/workflows/test.yml)
20
# 2. check each template for image, kernel, initrd, and nerdctl
21
# 3. detect size of image, kernel, initrd, and nerdctl (responses from remote are cached for faster iteration)
22
#    save the response in .check_location-response-cache.yaml
23
# 4. print content size, actual cache size (if available), by cache key
24
#
25
# The major differences for reducing cache usage are as follows:
26
# - it is now cached `~/.cache/lima/download/by-url-sha256/$sha256` instead of caching `~/.cache/lima/download`
27
# - the cache keys are now based on the image, kernel, initrd, and nerdctl digest instead of the template file's hash
28
# - enables the use of cache regardless of the operating system used to execute CI.
29
#
30
# The script requires the following commands:
31
# - gh: GitHub CLI.
32
#   Using to get the cache information
33
# - jq: Command-line JSON processor
34
#   Parse the workflow file and print runs-on and template.
35
#   Parse output from gh cache list
36
#   Calculate the expected content size, actual cached size, and cache-keys used.
37
# - limactl: lima CLI.
38
#   Using to validate the template file for getting nerdctl location and digest.
39
# - sha256sum: Print or check SHA256 (256-bit) checksums
40
# - xxd: make a hexdump or do the reverse.
41
#   Using to simulate the 'hashFile()' function in the workflow.
42
# - yq: Command-line YAML processor.
43
#   Parse the template file for image and nerdctl location, digest, and size.
44
#   Parse the cache response file for the cache.
45
#   Convert the collected information to JSON.
46

47
set -u -o pipefail
48

49
required_commands=(gh jq limactl sha256sum xxd yq)
50
for cmd in "${required_commands[@]}"; do
51
	if ! command -v "${cmd}" &>/dev/null; then
52
		echo "${cmd} is required. Please install it" >&2
53
		exit 1
54
	fi
55
done
56

57
# current workflow uses x86_64 only
58
arch=x86_64
59

60
LIMA_HOME=$(mktemp -d)
61
export LIMA_HOME
62

63
# parse the workflow file and print runs-on and template
64
# e.g.
65
# ```console
66
# $ print_runs_on_template_from_workflow .github/workflows/test.yml
67
# macos-12        templates/default.yaml
68
# ubuntu-24.04    templates/alpine.yaml
69
# ubuntu-24.04    templates/debian.yaml
70
# ubuntu-24.04    templates/fedora.yaml
71
# ubuntu-24.04    templates/archlinux.yaml
72
# ubuntu-24.04    templates/opensuse.yaml
73
# ubuntu-24.04    templates/experimental/net-user-v2.yaml
74
# ubuntu-24.04    templates/experimental/9p.yaml
75
# ubuntu-24.04    templates/docker.yaml
76
# ubuntu-24.04    templates/../hack/test-templates/alpine-iso-9p-writable.yaml
77
# ubuntu-24.04    templates/../hack/test-templates/test-misc.yaml
78
# macos-12        templates/vmnet.yaml
79
# macos-12        https://raw.githubusercontent.com/lima-vm/lima/v0.15.1/examples/ubuntu-lts.yaml
80
# macos-13        templates/experimental/vz.yaml
81
# macos-13        templates/fedora.yaml
82
# ```
83
function print_runs_on_template_from_workflow() {
84
	yq -o=j "$1" | jq -r '
85
		"./.github/actions/setup_cache_for_template" as $action |
86
		"\\$\\{\\{\\s*(?<path>\\S*)\\s*\\}\\}" as $pattern |
87
		.jobs | map_values(select(.steps)|
88
			."runs-on" as $runs_on |
89
			{
90
				template: .steps | map_values(select(.uses == $action)) | first |.with.template,
91
				matrix: .strategy.matrix
92
			} | select(.template) |
93
			. + { path: .template | (if test($pattern) then sub(".*\($pattern).*";"\(.path)")|split(".") else null end) } |
94
			(
95
				.template as $template|
96
				if .path then
97
					getpath(.path)|map(. as $item|$template|sub($pattern;$item))
98
				else
99
					[$template]
100
				end
101
			) | map("\($runs_on)\t\(.)")
102

103
		) | flatten |.[]
104
	'
105
}
106

107
# returns the OS name from the runner equivalent to the expression `${{ runner.os }}` in the workflow
108
# e.g.
109
# ```console
110
# $ runner_os_from_runner "macos-12"
111
# macOS
112
# $ runner_os_from_runner "ubuntu-24.04"
113
# Linux
114
# ```
115
function runner_os_from_runner() {
116
	# shellcheck disable=SC2249
117
	case "$1" in
118
	macos*)
119
		echo macOS
120
		;;
121
	ubuntu*)
122
		echo Linux
123
		;;
124
	esac
125
}
126

127
# format first column to MiB
128
# e.g.
129
# ```console
130
# $ echo 585498624 | size_to_mib
131
#   558.38 MiB
132
# ```
133
function size_to_mib() {
134
	awk '
135
		function mib(size) { return sprintf("%7.2f MiB", size / 1024 / 1024) }
136
		int($1)>0{ $1=" "mib($1) }
137
		int($2)>0{ $2=mib($2) }
138
		int($2)==0 && NF>1{ $2="<<missing>>" }
139
		{ print }
140
	'
141
}
142

143
# actual_cache_sizes=$(gh cache list --json key,createdAt,sizeInBytes|jq '[.[]|{"key":.key,"value":.sizeInBytes}]|from_entries')
144
# e.g.
145
# ```console
146
# $ echo "${actual_cache_sizes}"
147
# {
148
#   "Linux-1c3b2791d52735d916dc44767c745c2319eb7cae74af71bbf45ddb268f42fc1d": 810758533,
149
#   "Linux-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633036717,
150
#   "Linux-3b906d46fa532e3bc348c35fc8e7ede6c69f0b27032046ee2cbb56d4022d1146": 574242367,
151
#   "Linux-69a547b760dbf1650007ed541408474237bc611704077214adcac292de556444": 70310855,
152
#   "Linux-7782f8b4ff8cd378377eb79f8d61c9559b94bbd0c11d19eb380ee7bda19af04e": 494141177,
153
#   "Linux-8812aedfe81b4456d421645928b493b1f2f88aff04b7f3171207492fd44cd189": 812730766,
154
#   "Linux-caa7d8af214d55ad8902e82d5918e61573f3d6795d2b5ad9a35305e26fa0e6a9": 754723892,
155
#   "Linux-colima-v0.6.5": 226350335,
156
#   "Linux-de83bce0608d787e3c68c7a31c5fab2b6d054320fd7bf633a031845e2ee03414": 810691197,
157
#   "Linux-eb88a19dfcf2fb98278e7c7e941c143737c6d7cd8950a88f58e04b4ee7cef1bc": 570625794,
158
#   "Linux-f88f0b3b678ff6432386a42bdd27661133c84a36ad29f393da407c871b0143eb": 68490954,
159
#   "golangci-lint.cache-Linux-2850-74615231540133417fd618c72e37be92c5d3b3ad": 2434144,
160
#   "macOS-231c66957fc2cdb18ea10e63f60770049026e29051ecd6598fc390b60d6a4fa6": 633020464,
161
#   "macOS-49aa50a4872ded07ebf657c0eaf9e44ecc0c174d033a97c537ecd270f35b462f": 813179462,
162
#   "macOS-8f37f663956af5f743f0f99ab973729b6a02f200ebfac7a3a036eff296550732": 810756770,
163
#   "macOS-ef5509b5d4495c8c3590442ee912ad1c9a33f872dc4a29421c524fc1e2103b59": 813179476,
164
#   "macOS-upgrade-v0.15.1": 1157814690,
165
#   "setup-go-Linux-ubuntu20-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1015518352,
166
#   "setup-go-Linux-ubuntu20-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 936433302,
167
#   "setup-go-Linux-ubuntu24-go-1.22.6-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1090001859,
168
#   "setup-go-Linux-ubuntu24-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 526146768,
169
#   "setup-go-Windows-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1155374040,
170
#   "setup-go-Windows-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 1056433137,
171
#   "setup-go-macOS-go-1.23.0-02756877dbcc9669bb904e42e894c63aa9801138db94426a90a2d554f2705c52": 1060919942,
172
#   "setup-go-macOS-go-1.23.0-6bce2eefc6111ace836de8bb322432c072805737d5f3c5a3d47d2207a05f50df": 982139209
173
# }
174
actual_cache_sizes=$(
175
	gh cache list --json key,createdAt,sizeInBytes \
176
		--jq 'sort_by(.createdAt)|reverse|unique_by(.key)|sort_by(.key)|map({"key":.key,"value":.sizeInBytes})|from_entries'
177
)
178

179
workflows=(
180
	.github/workflows/test.yml
181
)
182

183
# shellcheck disable=SC2016
184
echo "=> compare expected content size, actual cached size, and cache-keys used before and after the change in https://github.com/lima-vm/lima/pull/2508"
185
# iterate over before and after
186
for cache_method in before after; do
187
	echo "==> ${cache_method}"
188
	echo "content-size actual-size cache-key"
189
	output_yaml=$(
190
		for workflow in "${workflows[@]}"; do
191
			print_runs_on_template_from_workflow "${workflow}"
192
		done | while IFS=$'\t' read -r runner template; do
193
			template=$(download_template_if_needed "${template}") || continue
194
			arch=$(detect_arch "${template}" "${arch}") || continue
195
			index=$(print_image_locations_for_arch_from_template "${template}" "${arch}" | print_valid_image_index) || continue
196
			image_kernel_initrd_info=$(print_image_kernel_initrd_locations_with_digest_for_arch_from_template_at_index "${template}" "${index}" "${arch}") || continue
197
			# shellcheck disable=SC2034 # shellcheck does not detect dynamic variables usage
198
			read -r image_location image_digest kernel_location kernel_digest initrd_location initrd_digest <<<"${image_kernel_initrd_info}"
199
			containerd_info=$(print_containerd_config_for_arch_from_template "${template}" "${@:2}") || continue
200
			# shellcheck disable=SC2034 # shellcheck does not detect dynamic variables usage
201
			read -r _containerd_enabled containerd_location containerd_digest <<<"${containerd_info}"
202

203
			if [[ ${cache_method} != after ]]; then
204
				key=$(runner_os_from_runner "${runner}" || true)-$(hash_file "${template}")
205
			else
206
				key=$(cache_key_from_prefix_location_and_digest image "${image_location}" "${image_digest}")
207
			fi
208
			size=$(size_from_location "${image_location}")
209
			for prefix in containerd kernel initrd; do
210
				location="${prefix}_location"
211
				digest="${prefix}_digest"
212
				[[ ${!location} != null ]] || continue
213
				if [[ ${cache_method} != after ]]; then
214
					# previous caching method packages all files in download to a single cache key
215
					size=$((size + $(size_from_location "${!location}")))
216
				else
217
					# new caching method caches each file separately
218
					key_for_prefix=$(cache_key_from_prefix_location_and_digest "${prefix}" "${!location}" "${!digest}")
219
					size_for_prefix=$(size_from_location "${!location}")
220
					printf -- "- key: %s\n  template: %s\n  location: %s\n  digest: %s\n  size: %s\n" \
221
						"${key_for_prefix}" "${template}" "${!location}" "${!digest}" "${size_for_prefix}"
222
				fi
223
			done
224
			printf -- "- key: %s\n  template: %s\n  location: %s\n  digest: %s\n  size: %s\n" \
225
				"${key}" "${template}" "${image_location}" "${image_digest}" "${size}"
226
		done
227
	)
228
	output_json=$(yq -o=j . <<<"${output_yaml}")
229

230
	# print size key
231
	jq --argjson actual_size "${actual_cache_sizes}" -r 'unique_by(.key)|sort_by(.key)|.[]|[.size, $actual_size[.key] // 0, .key]|@tsv' <<<"${output_json}" | size_to_mib
232
	# total
233
	echo "------------"
234
	jq '[unique_by(.key)|.[]|.size]|add' <<<"${output_json}" | size_to_mib
235
	# save the collected information as yaml if DEBUG is set
236
	if [[ -n ${DEBUG:+1} ]]; then
237
		cat <<<"${output_yaml}" >".calculate-cache-collected-info-${cache_method}.yaml"
238
		echo "Saved the collected information in .calculate-cache-collected-info-${cache_method}.yaml"
239
	fi
240
	echo ""
241
done
242

243
Product

Resources

Company