Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/scripts/spdxcheck.pl
48262 views
1
#!/usr/bin/env perl
2
3
# SPDX-License-Identifier: MIT
4
#
5
# Copyright (c) 2025, Rob Norris <[email protected]>
6
#
7
# Permission is hereby granted, free of charge, to any person obtaining a copy
8
# of this software and associated documentation files (the "Software"), to
9
# deal in the Software without restriction, including without limitation the
10
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11
# sell copies of the Software, and to permit persons to whom the Software is
12
# furnished to do so, subject to the following conditions:
13
#
14
# The above copyright notice and this permission notice shall be included in
15
# all copies or substantial portions of the Software.
16
#
17
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
# IN THE SOFTWARE.
24
25
use 5.010;
26
use warnings;
27
use strict;
28
29
# All files known to git are either "tagged" or "untagged". Tagged files are
30
# expected to have a license tag, while untagged files are expected to _not_
31
# have a license tag. There is no "optional" tag; all files are either "tagged"
32
# or "untagged".
33
#
34
# Whether or not a file is tagged or untagged is determined using the patterns
35
# in $tagged_patterns and $untagged_patterns and the following sequence:
36
#
37
# - if the file's full path is explicity listed in $tagged_patterns, then the
38
# file is tagged.
39
#
40
# - if the file's full path is explicitly listed in $untagged_patterns, then
41
# file is untagged.
42
#
43
# - if the filename matches a pattern in $tagged_patterns, and does not match a
44
# pattern in $untagged_patterns, then the file is tagged
45
#
46
# - otherwise, the file is untagged.
47
#
48
# The patterns do a simple glob-like match over the entire path relative to the
49
# root of the git repo (no leading /). '*' matches as anything at that point,
50
# across path fragments. '?' matches a single character.
51
52
my $tagged_patterns = q(
53
# Compiled source files
54
*.c
55
*.h
56
*.S
57
58
# Python files, eg test suite drivers, libzfs bindings
59
*.py
60
*.py.in
61
62
# Various support scripts
63
*.sh
64
*.pl
65
66
# Test suite
67
*.ksh
68
*.ksh.in
69
*.kshlib
70
*.kshlib.in
71
*.shlib
72
73
# Test suite data files
74
*.run
75
*.cfg
76
*.cfg.in
77
*.fio
78
*.lua
79
*.zcp
80
81
# Manpages
82
man/man?/*.?
83
man/man?/*.?.in
84
85
# Unsuffixed programs (or generated of same)
86
cmd/zarcstat.in
87
cmd/zarcsummary
88
cmd/dbufstat.in
89
cmd/zilstat.in
90
cmd/zpool/zpool.d/*
91
etc/init.d/zfs-import.in
92
etc/init.d/zfs-load-key.in
93
etc/init.d/zfs-mount.in
94
etc/init.d/zfs-share.in
95
etc/init.d/zfs-zed.in
96
etc/zfs/zfs-functions.in
97
scripts/objtool-wrapper.in
98
99
# Misc items that have clear licensing info but aren't easily matched,
100
# or are the first of a class that we aren't ready to match yet.
101
config/ax_code_coverage.m4
102
configure.ac
103
module/lua/README.zfs
104
scripts/kmodtool
105
tests/zfs-tests/tests/functional/inheritance/README.config
106
tests/zfs-tests/tests/functional/inheritance/README.state
107
cmd/zed/zed.d/statechange-notify.sh
108
);
109
110
my $untagged_patterns = q(
111
# Exclude CI tooling as it's not interesting for overall project
112
# licensing.
113
.github/*
114
115
# Everything below this has unclear licensing. Work is happening to
116
# identify and update them. Once one gains a tag it should be removed
117
# from this list.
118
119
cmd/zed/zed.d/*.sh
120
cmd/zpool/zpool.d/*
121
122
contrib/coverity/model.c
123
include/libzdb.h
124
include/os/freebsd/spl/sys/inttypes.h
125
include/os/freebsd/spl/sys/mode.h
126
include/os/freebsd/spl/sys/trace.h
127
include/os/freebsd/spl/sys/trace_zfs.h
128
include/os/freebsd/zfs/sys/zpl.h
129
include/os/linux/kernel/linux/page_compat.h
130
lib/libspl/include/os/freebsd/sys/sysmacros.h
131
lib/libspl/include/sys/string.h
132
lib/libspl/include/sys/trace_spl.h
133
lib/libspl/include/sys/trace_zfs.h
134
lib/libzdb/libzdb.c
135
module/lua/setjmp/setjmp.S
136
module/lua/setjmp/setjmp_ppc.S
137
module/zstd/include/sparc_compat.h
138
module/zstd/zstd_sparc.c
139
tests/zfs-tests/cmd/cp_files.c
140
tests/zfs-tests/cmd/zed_fd_spill-zedlet.c
141
tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c
142
tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c
143
tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c
144
tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c
145
146
autogen.sh
147
contrib/bpftrace/zfs-trace.sh
148
contrib/pyzfs/docs/source/conf.py
149
contrib/pyzfs/libzfs_core/test/__init__.py
150
contrib/pyzfs/setup.py.in
151
contrib/zcp/autosnap.lua
152
scripts/commitcheck.sh
153
scripts/man-dates.sh
154
scripts/mancheck.sh
155
scripts/paxcheck.sh
156
scripts/zfs-helpers.sh
157
scripts/zfs-tests-color.sh
158
scripts/zfs.sh
159
scripts/zimport.sh
160
tests/zfs-tests/callbacks/zfs_failsafe.ksh
161
tests/zfs-tests/include/commands.cfg
162
tests/zfs-tests/include/tunables.cfg
163
tests/zfs-tests/include/zpool_script.shlib
164
tests/zfs-tests/tests/functional/mv_files/random_creation.ksh
165
);
166
167
# For files expected to have a license tags, these are the acceptable tags by
168
# path. A file in one of these paths with a tag not listed here must be in the
169
# override list below. If the file is not in any of these paths, then
170
# $default_license_tags is used.
171
my $default_license_tags = [
172
'CDDL-1.0', '0BSD', 'BSD-2-Clause', 'BSD-3-Clause', 'MIT'
173
];
174
175
my @path_license_tags = (
176
# Conventional wisdom is that the Linux SPL must be GPL2+ for
177
# kernel compatibility.
178
'module/os/linux/spl' => ['GPL-2.0-or-later'],
179
'include/os/linux/spl' => ['GPL-2.0-or-later'],
180
181
# Third-party code should keep it's original license
182
'module/zstd/lib' => ['BSD-3-Clause OR GPL-2.0-only'],
183
'module/lua' => ['MIT'],
184
185
# lua/setjmp is platform-specific code sourced from various places
186
'module/lua/setjmp' => $default_license_tags,
187
188
# Some of the fletcher modules are dual-licensed
189
'module/zcommon/zfs_fletcher' =>
190
['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'],
191
192
'module/icp' => ['Apache-2.0', 'CDDL-1.0'],
193
'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'],
194
195
# Python bindings are always Apache-2.0
196
'contrib/pyzfs' => ['Apache-2.0'],
197
);
198
199
# This is a list of "special case" license tags that are in use in the tree,
200
# and the files where they occur. these exist for a variety of reasons, and
201
# generally should not be used for new code. If you need to bring in code that
202
# has a different license from the acceptable ones listed above, then you will
203
# also need to add it here, with rationale provided and approval given in your
204
# PR.
205
my %override_file_license_tags = (
206
207
# SPDX have repeatedly rejected the creation of a tag for a public
208
# domain dedication, as not all dedications are clear and unambiguious
209
# in their meaning and not all jurisdictions permit relinquishing a
210
# copyright anyway.
211
#
212
# A reasonably common workaround appears to be to create a local
213
# (project-specific) identifier to convey whatever meaning the project
214
# wishes it to. To cover OpenZFS' use of third-party code with a
215
# public domain dedication, we use this custom tag.
216
#
217
# Further reading:
218
# https://github.com/spdx/old-wiki/blob/main/Pages/Legal%20Team/Decisions/Dealing%20with%20Public%20Domain%20within%20SPDX%20Files.md
219
# https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/
220
# https://cr.yp.to/spdx.html
221
#
222
'LicenseRef-OpenZFS-ThirdParty-PublicDomain' => [qw(
223
include/sys/skein.h
224
module/icp/algs/skein/skein_block.c
225
module/icp/algs/skein/skein.c
226
module/icp/algs/skein/skein_impl.h
227
module/icp/algs/skein/skein_iv.c
228
module/icp/algs/skein/skein_port.h
229
module/zfs/vdev_draid_rand.c
230
)],
231
232
# Legacy inclusions
233
'Brian-Gladman-3-Clause' => [qw(
234
module/icp/asm-x86_64/aes/aestab.h
235
module/icp/asm-x86_64/aes/aesopt.h
236
module/icp/asm-x86_64/aes/aeskey.c
237
module/icp/asm-x86_64/aes/aes_amd64.S
238
)],
239
'OpenSSL-standalone' => [qw(
240
module/icp/asm-x86_64/aes/aes_aesni.S
241
)],
242
'LGPL-2.1-or-later' => [qw(
243
config/ax_code_coverage.m4
244
)],
245
246
# Legacy inclusions of BSD-2-Clause files in Linux SPL.
247
'BSD-2-Clause' => [qw(
248
include/os/linux/spl/sys/debug.h
249
module/os/linux/spl/spl-zone.c
250
)],
251
252
# Temporary overrides for things that have the wrong license for
253
# their path. Work is underway to understand and resolve these.
254
'GPL-2.0-or-later' => [qw(
255
include/os/freebsd/spl/sys/kstat.h
256
include/os/freebsd/spl/sys/sunddi.h
257
)],
258
'CDDL-1.0' => [qw(
259
include/os/linux/spl/sys/errno.h
260
include/os/linux/spl/sys/ia32/asm_linkage.h
261
include/os/linux/spl/sys/misc.h
262
include/os/linux/spl/sys/procfs_list.h
263
include/os/linux/spl/sys/trace.h
264
include/os/linux/spl/sys/trace_spl.h
265
include/os/linux/spl/sys/trace_taskq.h
266
include/os/linux/spl/sys/wmsum.h
267
module/os/linux/spl/spl-procfs-list.c
268
module/os/linux/spl/spl-trace.c
269
module/lua/README.zfs
270
)],
271
);
272
273
##########
274
275
sub setup_patterns {
276
my ($patterns) = @_;
277
278
my @re;
279
my @files;
280
281
for my $pat (split "\n", $patterns) {
282
# remove leading/trailing whitespace and comments
283
$pat =~ s/(:?^\s*|\s*(:?#.*)?$)//g;
284
# skip (now-)empty lines
285
next if $pat eq '';
286
287
# if the "pattern" has no metachars, then it's a literal file
288
# path and gets matched a bit more strongly
289
unless ($pat =~ m/[?*]/) {
290
push @files, $pat;
291
next;
292
}
293
294
# naive pattern to regex conversion
295
296
# escape simple metachars
297
$pat =~ s/([\.\(\[])/\Q$1\E/g;
298
299
$pat =~ s/\?/./g; # glob ? -> regex .
300
$pat =~ s/\*/.*/g; # glob * -> regex .*
301
302
push @re, $pat;
303
}
304
305
my $re = join '|', @re;
306
return (qr/^(?:$re)$/, { map { $_ => 1 } @files });
307
};
308
309
my ($tagged_re, $tagged_files) = setup_patterns($tagged_patterns);
310
my ($untagged_re, $untagged_files) = setup_patterns($untagged_patterns);
311
312
sub file_is_tagged {
313
my ($file) = @_;
314
315
# explicitly tagged
316
if ($tagged_files->{$file}) {
317
delete $tagged_files->{$file};
318
return 1;
319
}
320
321
# explicitly untagged
322
if ($untagged_files->{$file}) {
323
delete $untagged_files->{$file};
324
return 0;
325
}
326
327
# must match tagged patterns and not match untagged patterns
328
return ($file =~ $tagged_re) && !($file =~ $untagged_re);
329
}
330
331
my %override_tags = map {
332
my $tag = $_;
333
map { $_ => $tag } @{$override_file_license_tags{$_}};
334
} keys %override_file_license_tags;
335
336
##########
337
338
my $rc = 0;
339
340
# Get a list of all files known to git. This is a crude way of avoiding any
341
# build artifacts that have tags embedded in them.
342
my @git_files = sort grep { chomp } qx(git ls-tree --name-only -r HEAD);
343
344
# Scan all files and work out if their tags are correct.
345
for my $file (@git_files) {
346
# Ignore non-files. git can store other types of objects (submodule
347
# dirs, symlinks, etc) that aren't interesting for licensing.
348
next unless -f $file && ! -l $file;
349
350
# Open the file, and extract its license tag. We only check the first
351
# 4K of each file because many of these files are large, binary, or
352
# both. For a typical source file that means the tag should be found
353
# within the first ~50 lines.
354
open my $fh, '<', $file or die "$0: couldn't open $file: $!\n";
355
my $nbytes = read $fh, my $buf, 4096;
356
die "$0: couldn't read $file: $!\n" if !defined $nbytes;
357
358
my ($tag) =
359
$buf =~ m/\bSPDX-License-Identifier: ([A-Za-z0-9_\-\. ]+)$/smg;
360
361
close $fh;
362
363
# Decide if the file should have a tag at all
364
my $tagged = file_is_tagged($file);
365
366
# If no license tag is wanted, there's not much left to do
367
if (!$tagged) {
368
if (defined $tag) {
369
# untagged file has a tag, pattern change required
370
say "unexpected license tag: $file";
371
$rc = 1;
372
}
373
next;
374
}
375
376
# If a tag is required, but doesn't have one, warn and loop.
377
if (!defined $tag) {
378
say "missing license tag: $file";
379
$rc = 1;
380
next;
381
}
382
383
# Determine the set of valid license tags for this file. Start with
384
# the defaults.
385
my $tags = $default_license_tags;
386
387
if ($override_tags{$file}) {
388
# File has an explicit override, use it.
389
$tags = [delete $override_tags{$file}];
390
} else {
391
# Work through the path tag sets, taking the set with the
392
# most precise match. If no sets match, we fall through and
393
# are left with the default set.
394
my $matchlen = 0;
395
for (my $n = 0; $n < @path_license_tags; $n += 2) {
396
my ($path, $t) = @path_license_tags[$n,$n+1];
397
if (substr($file, 0, length($path)) eq $path &&
398
length($path) > $matchlen) {
399
$tags = $t;
400
$matchlen = length($path);
401
}
402
}
403
}
404
405
# Confirm the file's tag is in the set, and warn if not.
406
my %tags = map { $_ => 1 } @$tags;
407
unless ($tags{$tag}) {
408
say "invalid license tag: $file";
409
say " (got $tag; expected: @$tags)";
410
$rc = 1;
411
next;
412
}
413
}
414
415
##########
416
417
# List any files explicitly listed as tagged or untagged that we didn't see.
418
# Likely the file was removed from the repo but not from our lists.
419
420
for my $file (sort keys %$tagged_files) {
421
say "explicitly tagged file not on disk: $file";
422
$rc = 1;
423
}
424
for my $file (sort keys %$untagged_files) {
425
say "explicitly untagged file not on disk: $file";
426
$rc = 1;
427
}
428
for my $file (sort keys %override_tags) {
429
say "explicitly overridden file not on disk: $file";
430
$rc = 1;
431
}
432
433
exit $rc;
434
435