Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/scripts/spdxcheck.pl
105240 views
1
#!/usr/bin/env perl
2
3
# SPDX-License-Identifier: MIT
4
#
5
# Copyright (c) 2025, Rob Norris <[email protected]>
6
#
7
# Permission is hereby granted, free of charge, to any person obtaining a copy
8
# of this software and associated documentation files (the "Software"), to
9
# deal in the Software without restriction, including without limitation the
10
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
11
# sell copies of the Software, and to permit persons to whom the Software is
12
# furnished to do so, subject to the following conditions:
13
#
14
# The above copyright notice and this permission notice shall be included in
15
# all copies or substantial portions of the Software.
16
#
17
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23
# IN THE SOFTWARE.
24
25
use 5.010;
26
use warnings;
27
use strict;
28
29
# All files known to git are either "tagged" or "untagged". Tagged files are
30
# expected to have a license tag, while untagged files are expected to _not_
31
# have a license tag. There is no "optional" tag; all files are either "tagged"
32
# or "untagged".
33
#
34
# Whether or not a file is tagged or untagged is determined using the patterns
35
# in $tagged_patterns and $untagged_patterns and the following sequence:
36
#
37
# - if the file's full path is explicity listed in $tagged_patterns, then the
38
# file is tagged.
39
#
40
# - if the file's full path is explicitly listed in $untagged_patterns, then
41
# file is untagged.
42
#
43
# - if the filename matches a pattern in $tagged_patterns, and does not match a
44
# pattern in $untagged_patterns, then the file is tagged
45
#
46
# - otherwise, the file is untagged.
47
#
48
# The patterns do a simple glob-like match over the entire path relative to the
49
# root of the git repo (no leading /). '*' matches as anything at that point,
50
# across path fragments. '?' matches a single character.
51
52
my $tagged_patterns = q(
53
# Compiled source files
54
*.c
55
*.h
56
*.S
57
58
# Python files, eg test suite drivers, libzfs bindings
59
*.py
60
*.py.in
61
62
# Various support scripts
63
*.sh
64
*.pl
65
66
# Test suite
67
*.ksh
68
*.ksh.in
69
*.kshlib
70
*.kshlib.in
71
*.shlib
72
73
# Test suite data files
74
*.run
75
*.cfg
76
*.cfg.in
77
*.fio
78
*.lua
79
*.zcp
80
81
# Manpages
82
man/man?/*.?
83
man/man?/*.?.in
84
85
# Unsuffixed programs (or generated of same)
86
cmd/zarcstat.in
87
cmd/zarcsummary
88
cmd/dbufstat.in
89
cmd/zilstat.in
90
cmd/zpool/zpool.d/*
91
etc/init.d/zfs-import.in
92
etc/init.d/zfs-load-key.in
93
etc/init.d/zfs-mount.in
94
etc/init.d/zfs-share.in
95
etc/init.d/zfs-zed.in
96
etc/zfs/zfs-functions.in
97
scripts/objtool-wrapper.in
98
99
# Misc items that have clear licensing info but aren't easily matched,
100
# or are the first of a class that we aren't ready to match yet.
101
config/ax_code_coverage.m4
102
configure.ac
103
module/lua/README.zfs
104
scripts/kmodtool
105
tests/zfs-tests/tests/functional/inheritance/README.config
106
tests/zfs-tests/tests/functional/inheritance/README.state
107
cmd/zed/zed.d/statechange-notify.sh
108
);
109
110
my $untagged_patterns = q(
111
# Exclude CI tooling as it's not interesting for overall project
112
# licensing.
113
.github/*
114
115
# Everything below this has unclear licensing. Work is happening to
116
# identify and update them. Once one gains a tag it should be removed
117
# from this list.
118
119
cmd/zed/zed.d/*.sh
120
cmd/zpool/zpool.d/*
121
122
contrib/coverity/model.c
123
include/libzdb.h
124
include/os/freebsd/spl/sys/inttypes.h
125
include/os/freebsd/spl/sys/mode.h
126
include/os/freebsd/spl/sys/trace.h
127
include/os/freebsd/zfs/sys/trace_zfs.h
128
include/os/freebsd/zfs/sys/zpl.h
129
include/os/linux/kernel/linux/page_compat.h
130
lib/libspl/include/sys/string.h
131
lib/libzdb/libzdb.c
132
lib/libzpool/include/sys/trace_zfs.h
133
module/lua/setjmp/setjmp.S
134
module/lua/setjmp/setjmp_ppc.S
135
module/zstd/include/sparc_compat.h
136
module/zstd/zstd_sparc.c
137
tests/zfs-tests/cmd/cp_files.c
138
tests/zfs-tests/cmd/zed_fd_spill-zedlet.c
139
tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c
140
tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c
141
tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c
142
tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c
143
144
autogen.sh
145
contrib/bpftrace/zfs-trace.sh
146
contrib/pyzfs/docs/source/conf.py
147
contrib/pyzfs/libzfs_core/test/__init__.py
148
contrib/pyzfs/setup.py.in
149
contrib/zcp/autosnap.lua
150
scripts/commitcheck.sh
151
scripts/man-dates.sh
152
scripts/mancheck.sh
153
scripts/paxcheck.sh
154
scripts/zfs-helpers.sh
155
scripts/zfs-tests-color.sh
156
scripts/zfs.sh
157
scripts/zimport.sh
158
tests/zfs-tests/callbacks/zfs_failsafe.ksh
159
tests/zfs-tests/include/commands.cfg
160
tests/zfs-tests/include/tunables.cfg
161
tests/zfs-tests/include/zpool_script.shlib
162
tests/zfs-tests/tests/functional/mv_files/random_creation.ksh
163
);
164
165
# For files expected to have a license tags, these are the acceptable tags by
166
# path. A file in one of these paths with a tag not listed here must be in the
167
# override list below. If the file is not in any of these paths, then
168
# $default_license_tags is used.
169
my $default_license_tags = [
170
'CDDL-1.0', '0BSD', 'BSD-2-Clause', 'BSD-3-Clause', 'MIT'
171
];
172
173
my @path_license_tags = (
174
# Conventional wisdom is that the Linux SPL must be GPL2+ for
175
# kernel compatibility.
176
'module/os/linux/spl' => ['GPL-2.0-or-later'],
177
'include/os/linux/spl' => ['GPL-2.0-or-later'],
178
179
# Third-party code should keep it's original license
180
'module/zstd/lib' => ['BSD-3-Clause OR GPL-2.0-only'],
181
'module/lua' => ['MIT'],
182
183
# lua/setjmp is platform-specific code sourced from various places
184
'module/lua/setjmp' => $default_license_tags,
185
186
# Some of the fletcher modules are dual-licensed
187
'module/zcommon/zfs_fletcher' =>
188
['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'],
189
190
'module/icp' => ['Apache-2.0', 'CDDL-1.0'],
191
'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'],
192
193
# Python bindings are always Apache-2.0
194
'contrib/pyzfs' => ['Apache-2.0'],
195
);
196
197
# This is a list of "special case" license tags that are in use in the tree,
198
# and the files where they occur. these exist for a variety of reasons, and
199
# generally should not be used for new code. If you need to bring in code that
200
# has a different license from the acceptable ones listed above, then you will
201
# also need to add it here, with rationale provided and approval given in your
202
# PR.
203
my %override_file_license_tags = (
204
205
# SPDX have repeatedly rejected the creation of a tag for a public
206
# domain dedication, as not all dedications are clear and unambiguious
207
# in their meaning and not all jurisdictions permit relinquishing a
208
# copyright anyway.
209
#
210
# A reasonably common workaround appears to be to create a local
211
# (project-specific) identifier to convey whatever meaning the project
212
# wishes it to. To cover OpenZFS' use of third-party code with a
213
# public domain dedication, we use this custom tag.
214
#
215
# Further reading:
216
# https://github.com/spdx/old-wiki/blob/main/Pages/Legal%20Team/Decisions/Dealing%20with%20Public%20Domain%20within%20SPDX%20Files.md
217
# https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/
218
# https://cr.yp.to/spdx.html
219
#
220
'LicenseRef-OpenZFS-ThirdParty-PublicDomain' => [qw(
221
include/sys/skein.h
222
module/icp/algs/skein/skein_block.c
223
module/icp/algs/skein/skein.c
224
module/icp/algs/skein/skein_impl.h
225
module/icp/algs/skein/skein_iv.c
226
module/icp/algs/skein/skein_port.h
227
module/zfs/vdev_draid_rand.c
228
)],
229
230
# Legacy inclusions
231
'Brian-Gladman-3-Clause' => [qw(
232
module/icp/asm-x86_64/aes/aestab.h
233
module/icp/asm-x86_64/aes/aesopt.h
234
module/icp/asm-x86_64/aes/aeskey.c
235
module/icp/asm-x86_64/aes/aes_amd64.S
236
)],
237
'OpenSSL-standalone' => [qw(
238
module/icp/asm-x86_64/aes/aes_aesni.S
239
)],
240
'LGPL-2.1-or-later' => [qw(
241
config/ax_code_coverage.m4
242
)],
243
244
# Legacy inclusions of BSD-2-Clause files in Linux SPL.
245
'BSD-2-Clause' => [qw(
246
include/os/linux/spl/sys/debug.h
247
module/os/linux/spl/spl-zone.c
248
)],
249
250
# Temporary overrides for things that have the wrong license for
251
# their path. Work is underway to understand and resolve these.
252
'GPL-2.0-or-later' => [qw(
253
include/os/freebsd/spl/sys/kstat.h
254
include/os/freebsd/spl/sys/sunddi.h
255
)],
256
'CDDL-1.0' => [qw(
257
include/os/linux/spl/sys/errno.h
258
include/os/linux/spl/sys/ia32/asm_linkage.h
259
include/os/linux/spl/sys/misc.h
260
include/os/linux/spl/sys/procfs_list.h
261
include/os/linux/spl/sys/trace.h
262
include/os/linux/spl/sys/trace_spl.h
263
include/os/linux/spl/sys/trace_taskq.h
264
include/os/linux/spl/sys/wmsum.h
265
module/os/linux/spl/spl-procfs-list.c
266
module/os/linux/spl/spl-trace.c
267
module/lua/README.zfs
268
)],
269
);
270
271
##########
272
273
sub setup_patterns {
274
my ($patterns) = @_;
275
276
my @re;
277
my @files;
278
279
for my $pat (split "\n", $patterns) {
280
# remove leading/trailing whitespace and comments
281
$pat =~ s/(:?^\s*|\s*(:?#.*)?$)//g;
282
# skip (now-)empty lines
283
next if $pat eq '';
284
285
# if the "pattern" has no metachars, then it's a literal file
286
# path and gets matched a bit more strongly
287
unless ($pat =~ m/[?*]/) {
288
push @files, $pat;
289
next;
290
}
291
292
# naive pattern to regex conversion
293
294
# escape simple metachars
295
$pat =~ s/([\.\(\[])/\Q$1\E/g;
296
297
$pat =~ s/\?/./g; # glob ? -> regex .
298
$pat =~ s/\*/.*/g; # glob * -> regex .*
299
300
push @re, $pat;
301
}
302
303
my $re = join '|', @re;
304
return (qr/^(?:$re)$/, { map { $_ => 1 } @files });
305
};
306
307
my ($tagged_re, $tagged_files) = setup_patterns($tagged_patterns);
308
my ($untagged_re, $untagged_files) = setup_patterns($untagged_patterns);
309
310
sub file_is_tagged {
311
my ($file) = @_;
312
313
# explicitly tagged
314
if ($tagged_files->{$file}) {
315
delete $tagged_files->{$file};
316
return 1;
317
}
318
319
# explicitly untagged
320
if ($untagged_files->{$file}) {
321
delete $untagged_files->{$file};
322
return 0;
323
}
324
325
# must match tagged patterns and not match untagged patterns
326
return ($file =~ $tagged_re) && !($file =~ $untagged_re);
327
}
328
329
my %override_tags = map {
330
my $tag = $_;
331
map { $_ => $tag } @{$override_file_license_tags{$_}};
332
} keys %override_file_license_tags;
333
334
##########
335
336
my $rc = 0;
337
338
# Get a list of all files known to git. This is a crude way of avoiding any
339
# build artifacts that have tags embedded in them.
340
my @git_files = sort grep { chomp } qx(git ls-tree --name-only -r HEAD);
341
342
# Scan all files and work out if their tags are correct.
343
for my $file (@git_files) {
344
# Ignore non-files. git can store other types of objects (submodule
345
# dirs, symlinks, etc) that aren't interesting for licensing.
346
next unless -f $file && ! -l $file;
347
348
# Open the file, and extract its license tag. We only check the first
349
# 4K of each file because many of these files are large, binary, or
350
# both. For a typical source file that means the tag should be found
351
# within the first ~50 lines.
352
open my $fh, '<', $file or die "$0: couldn't open $file: $!\n";
353
my $nbytes = read $fh, my $buf, 4096;
354
die "$0: couldn't read $file: $!\n" if !defined $nbytes;
355
356
my ($tag) =
357
$buf =~ m/\bSPDX-License-Identifier: ([A-Za-z0-9_\-\. ]+)$/smg;
358
359
close $fh;
360
361
# Decide if the file should have a tag at all
362
my $tagged = file_is_tagged($file);
363
364
# If no license tag is wanted, there's not much left to do
365
if (!$tagged) {
366
if (defined $tag) {
367
# untagged file has a tag, pattern change required
368
say "unexpected license tag: $file";
369
$rc = 1;
370
}
371
next;
372
}
373
374
# If a tag is required, but doesn't have one, warn and loop.
375
if (!defined $tag) {
376
say "missing license tag: $file";
377
$rc = 1;
378
next;
379
}
380
381
# Determine the set of valid license tags for this file. Start with
382
# the defaults.
383
my $tags = $default_license_tags;
384
385
if ($override_tags{$file}) {
386
# File has an explicit override, use it.
387
$tags = [delete $override_tags{$file}];
388
} else {
389
# Work through the path tag sets, taking the set with the
390
# most precise match. If no sets match, we fall through and
391
# are left with the default set.
392
my $matchlen = 0;
393
for (my $n = 0; $n < @path_license_tags; $n += 2) {
394
my ($path, $t) = @path_license_tags[$n,$n+1];
395
if (substr($file, 0, length($path)) eq $path &&
396
length($path) > $matchlen) {
397
$tags = $t;
398
$matchlen = length($path);
399
}
400
}
401
}
402
403
# Confirm the file's tag is in the set, and warn if not.
404
my %tags = map { $_ => 1 } @$tags;
405
unless ($tags{$tag}) {
406
say "invalid license tag: $file";
407
say " (got $tag; expected: @$tags)";
408
$rc = 1;
409
next;
410
}
411
}
412
413
##########
414
415
# List any files explicitly listed as tagged or untagged that we didn't see.
416
# Likely the file was removed from the repo but not from our lists.
417
418
for my $file (sort keys %$tagged_files) {
419
say "explicitly tagged file not on disk: $file";
420
$rc = 1;
421
}
422
for my $file (sort keys %$untagged_files) {
423
say "explicitly untagged file not on disk: $file";
424
$rc = 1;
425
}
426
for my $file (sort keys %override_tags) {
427
say "explicitly overridden file not on disk: $file";
428
$rc = 1;
429
}
430
431
exit $rc;
432
433