Path: blob/main/sys/contrib/openzfs/scripts/spdxcheck.pl
48262 views
#!/usr/bin/env perl12# SPDX-License-Identifier: MIT3#4# Copyright (c) 2025, Rob Norris <[email protected]>5#6# Permission is hereby granted, free of charge, to any person obtaining a copy7# of this software and associated documentation files (the "Software"), to8# deal in the Software without restriction, including without limitation the9# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or10# sell copies of the Software, and to permit persons to whom the Software is11# furnished to do so, subject to the following conditions:12#13# The above copyright notice and this permission notice shall be included in14# all copies or substantial portions of the Software.15#16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING21# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS22# IN THE SOFTWARE.2324use 5.010;25use warnings;26use strict;2728# All files known to git are either "tagged" or "untagged". Tagged files are29# expected to have a license tag, while untagged files are expected to _not_30# have a license tag. There is no "optional" tag; all files are either "tagged"31# or "untagged".32#33# Whether or not a file is tagged or untagged is determined using the patterns34# in $tagged_patterns and $untagged_patterns and the following sequence:35#36# - if the file's full path is explicity listed in $tagged_patterns, then the37# file is tagged.38#39# - if the file's full path is explicitly listed in $untagged_patterns, then40# file is untagged.41#42# - if the filename matches a pattern in $tagged_patterns, and does not match a43# pattern in $untagged_patterns, then the file is tagged44#45# - otherwise, the file is untagged.46#47# The patterns do a simple glob-like match over the entire path relative to the48# root of the git repo (no leading /). '*' matches as anything at that point,49# across path fragments. '?' matches a single character.5051my $tagged_patterns = q(52# Compiled source files53*.c54*.h55*.S5657# Python files, eg test suite drivers, libzfs bindings58*.py59*.py.in6061# Various support scripts62*.sh63*.pl6465# Test suite66*.ksh67*.ksh.in68*.kshlib69*.kshlib.in70*.shlib7172# Test suite data files73*.run74*.cfg75*.cfg.in76*.fio77*.lua78*.zcp7980# Manpages81man/man?/*.?82man/man?/*.?.in8384# Unsuffixed programs (or generated of same)85cmd/zarcstat.in86cmd/zarcsummary87cmd/dbufstat.in88cmd/zilstat.in89cmd/zpool/zpool.d/*90etc/init.d/zfs-import.in91etc/init.d/zfs-load-key.in92etc/init.d/zfs-mount.in93etc/init.d/zfs-share.in94etc/init.d/zfs-zed.in95etc/zfs/zfs-functions.in96scripts/objtool-wrapper.in9798# Misc items that have clear licensing info but aren't easily matched,99# or are the first of a class that we aren't ready to match yet.100config/ax_code_coverage.m4101configure.ac102module/lua/README.zfs103scripts/kmodtool104tests/zfs-tests/tests/functional/inheritance/README.config105tests/zfs-tests/tests/functional/inheritance/README.state106cmd/zed/zed.d/statechange-notify.sh107);108109my $untagged_patterns = q(110# Exclude CI tooling as it's not interesting for overall project111# licensing.112.github/*113114# Everything below this has unclear licensing. Work is happening to115# identify and update them. Once one gains a tag it should be removed116# from this list.117118cmd/zed/zed.d/*.sh119cmd/zpool/zpool.d/*120121contrib/coverity/model.c122include/libzdb.h123include/os/freebsd/spl/sys/inttypes.h124include/os/freebsd/spl/sys/mode.h125include/os/freebsd/spl/sys/trace.h126include/os/freebsd/spl/sys/trace_zfs.h127include/os/freebsd/zfs/sys/zpl.h128include/os/linux/kernel/linux/page_compat.h129lib/libspl/include/os/freebsd/sys/sysmacros.h130lib/libspl/include/sys/string.h131lib/libspl/include/sys/trace_spl.h132lib/libspl/include/sys/trace_zfs.h133lib/libzdb/libzdb.c134module/lua/setjmp/setjmp.S135module/lua/setjmp/setjmp_ppc.S136module/zstd/include/sparc_compat.h137module/zstd/zstd_sparc.c138tests/zfs-tests/cmd/cp_files.c139tests/zfs-tests/cmd/zed_fd_spill-zedlet.c140tests/zfs-tests/tests/functional/tmpfile/tmpfile_001_pos.c141tests/zfs-tests/tests/functional/tmpfile/tmpfile_002_pos.c142tests/zfs-tests/tests/functional/tmpfile/tmpfile_003_pos.c143tests/zfs-tests/tests/functional/tmpfile/tmpfile_test.c144145autogen.sh146contrib/bpftrace/zfs-trace.sh147contrib/pyzfs/docs/source/conf.py148contrib/pyzfs/libzfs_core/test/__init__.py149contrib/pyzfs/setup.py.in150contrib/zcp/autosnap.lua151scripts/commitcheck.sh152scripts/man-dates.sh153scripts/mancheck.sh154scripts/paxcheck.sh155scripts/zfs-helpers.sh156scripts/zfs-tests-color.sh157scripts/zfs.sh158scripts/zimport.sh159tests/zfs-tests/callbacks/zfs_failsafe.ksh160tests/zfs-tests/include/commands.cfg161tests/zfs-tests/include/tunables.cfg162tests/zfs-tests/include/zpool_script.shlib163tests/zfs-tests/tests/functional/mv_files/random_creation.ksh164);165166# For files expected to have a license tags, these are the acceptable tags by167# path. A file in one of these paths with a tag not listed here must be in the168# override list below. If the file is not in any of these paths, then169# $default_license_tags is used.170my $default_license_tags = [171'CDDL-1.0', '0BSD', 'BSD-2-Clause', 'BSD-3-Clause', 'MIT'172];173174my @path_license_tags = (175# Conventional wisdom is that the Linux SPL must be GPL2+ for176# kernel compatibility.177'module/os/linux/spl' => ['GPL-2.0-or-later'],178'include/os/linux/spl' => ['GPL-2.0-or-later'],179180# Third-party code should keep it's original license181'module/zstd/lib' => ['BSD-3-Clause OR GPL-2.0-only'],182'module/lua' => ['MIT'],183184# lua/setjmp is platform-specific code sourced from various places185'module/lua/setjmp' => $default_license_tags,186187# Some of the fletcher modules are dual-licensed188'module/zcommon/zfs_fletcher' =>189['BSD-2-Clause OR GPL-2.0-only', 'CDDL-1.0'],190191'module/icp' => ['Apache-2.0', 'CDDL-1.0'],192'contrib/icp' => ['Apache-2.0', 'CDDL-1.0'],193194# Python bindings are always Apache-2.0195'contrib/pyzfs' => ['Apache-2.0'],196);197198# This is a list of "special case" license tags that are in use in the tree,199# and the files where they occur. these exist for a variety of reasons, and200# generally should not be used for new code. If you need to bring in code that201# has a different license from the acceptable ones listed above, then you will202# also need to add it here, with rationale provided and approval given in your203# PR.204my %override_file_license_tags = (205206# SPDX have repeatedly rejected the creation of a tag for a public207# domain dedication, as not all dedications are clear and unambiguious208# in their meaning and not all jurisdictions permit relinquishing a209# copyright anyway.210#211# A reasonably common workaround appears to be to create a local212# (project-specific) identifier to convey whatever meaning the project213# wishes it to. To cover OpenZFS' use of third-party code with a214# public domain dedication, we use this custom tag.215#216# Further reading:217# https://github.com/spdx/old-wiki/blob/main/Pages/Legal%20Team/Decisions/Dealing%20with%20Public%20Domain%20within%20SPDX%20Files.md218# https://spdx.github.io/spdx-spec/v2.3/other-licensing-information-detected/219# https://cr.yp.to/spdx.html220#221'LicenseRef-OpenZFS-ThirdParty-PublicDomain' => [qw(222include/sys/skein.h223module/icp/algs/skein/skein_block.c224module/icp/algs/skein/skein.c225module/icp/algs/skein/skein_impl.h226module/icp/algs/skein/skein_iv.c227module/icp/algs/skein/skein_port.h228module/zfs/vdev_draid_rand.c229)],230231# Legacy inclusions232'Brian-Gladman-3-Clause' => [qw(233module/icp/asm-x86_64/aes/aestab.h234module/icp/asm-x86_64/aes/aesopt.h235module/icp/asm-x86_64/aes/aeskey.c236module/icp/asm-x86_64/aes/aes_amd64.S237)],238'OpenSSL-standalone' => [qw(239module/icp/asm-x86_64/aes/aes_aesni.S240)],241'LGPL-2.1-or-later' => [qw(242config/ax_code_coverage.m4243)],244245# Legacy inclusions of BSD-2-Clause files in Linux SPL.246'BSD-2-Clause' => [qw(247include/os/linux/spl/sys/debug.h248module/os/linux/spl/spl-zone.c249)],250251# Temporary overrides for things that have the wrong license for252# their path. Work is underway to understand and resolve these.253'GPL-2.0-or-later' => [qw(254include/os/freebsd/spl/sys/kstat.h255include/os/freebsd/spl/sys/sunddi.h256)],257'CDDL-1.0' => [qw(258include/os/linux/spl/sys/errno.h259include/os/linux/spl/sys/ia32/asm_linkage.h260include/os/linux/spl/sys/misc.h261include/os/linux/spl/sys/procfs_list.h262include/os/linux/spl/sys/trace.h263include/os/linux/spl/sys/trace_spl.h264include/os/linux/spl/sys/trace_taskq.h265include/os/linux/spl/sys/wmsum.h266module/os/linux/spl/spl-procfs-list.c267module/os/linux/spl/spl-trace.c268module/lua/README.zfs269)],270);271272##########273274sub setup_patterns {275my ($patterns) = @_;276277my @re;278my @files;279280for my $pat (split "\n", $patterns) {281# remove leading/trailing whitespace and comments282$pat =~ s/(:?^\s*|\s*(:?#.*)?$)//g;283# skip (now-)empty lines284next if $pat eq '';285286# if the "pattern" has no metachars, then it's a literal file287# path and gets matched a bit more strongly288unless ($pat =~ m/[?*]/) {289push @files, $pat;290next;291}292293# naive pattern to regex conversion294295# escape simple metachars296$pat =~ s/([\.\(\[])/\Q$1\E/g;297298$pat =~ s/\?/./g; # glob ? -> regex .299$pat =~ s/\*/.*/g; # glob * -> regex .*300301push @re, $pat;302}303304my $re = join '|', @re;305return (qr/^(?:$re)$/, { map { $_ => 1 } @files });306};307308my ($tagged_re, $tagged_files) = setup_patterns($tagged_patterns);309my ($untagged_re, $untagged_files) = setup_patterns($untagged_patterns);310311sub file_is_tagged {312my ($file) = @_;313314# explicitly tagged315if ($tagged_files->{$file}) {316delete $tagged_files->{$file};317return 1;318}319320# explicitly untagged321if ($untagged_files->{$file}) {322delete $untagged_files->{$file};323return 0;324}325326# must match tagged patterns and not match untagged patterns327return ($file =~ $tagged_re) && !($file =~ $untagged_re);328}329330my %override_tags = map {331my $tag = $_;332map { $_ => $tag } @{$override_file_license_tags{$_}};333} keys %override_file_license_tags;334335##########336337my $rc = 0;338339# Get a list of all files known to git. This is a crude way of avoiding any340# build artifacts that have tags embedded in them.341my @git_files = sort grep { chomp } qx(git ls-tree --name-only -r HEAD);342343# Scan all files and work out if their tags are correct.344for my $file (@git_files) {345# Ignore non-files. git can store other types of objects (submodule346# dirs, symlinks, etc) that aren't interesting for licensing.347next unless -f $file && ! -l $file;348349# Open the file, and extract its license tag. We only check the first350# 4K of each file because many of these files are large, binary, or351# both. For a typical source file that means the tag should be found352# within the first ~50 lines.353open my $fh, '<', $file or die "$0: couldn't open $file: $!\n";354my $nbytes = read $fh, my $buf, 4096;355die "$0: couldn't read $file: $!\n" if !defined $nbytes;356357my ($tag) =358$buf =~ m/\bSPDX-License-Identifier: ([A-Za-z0-9_\-\. ]+)$/smg;359360close $fh;361362# Decide if the file should have a tag at all363my $tagged = file_is_tagged($file);364365# If no license tag is wanted, there's not much left to do366if (!$tagged) {367if (defined $tag) {368# untagged file has a tag, pattern change required369say "unexpected license tag: $file";370$rc = 1;371}372next;373}374375# If a tag is required, but doesn't have one, warn and loop.376if (!defined $tag) {377say "missing license tag: $file";378$rc = 1;379next;380}381382# Determine the set of valid license tags for this file. Start with383# the defaults.384my $tags = $default_license_tags;385386if ($override_tags{$file}) {387# File has an explicit override, use it.388$tags = [delete $override_tags{$file}];389} else {390# Work through the path tag sets, taking the set with the391# most precise match. If no sets match, we fall through and392# are left with the default set.393my $matchlen = 0;394for (my $n = 0; $n < @path_license_tags; $n += 2) {395my ($path, $t) = @path_license_tags[$n,$n+1];396if (substr($file, 0, length($path)) eq $path &&397length($path) > $matchlen) {398$tags = $t;399$matchlen = length($path);400}401}402}403404# Confirm the file's tag is in the set, and warn if not.405my %tags = map { $_ => 1 } @$tags;406unless ($tags{$tag}) {407say "invalid license tag: $file";408say " (got $tag; expected: @$tags)";409$rc = 1;410next;411}412}413414##########415416# List any files explicitly listed as tagged or untagged that we didn't see.417# Likely the file was removed from the repo but not from our lists.418419for my $file (sort keys %$tagged_files) {420say "explicitly tagged file not on disk: $file";421$rc = 1;422}423for my $file (sort keys %$untagged_files) {424say "explicitly untagged file not on disk: $file";425$rc = 1;426}427for my $file (sort keys %override_tags) {428say "explicitly overridden file not on disk: $file";429$rc = 1;430}431432exit $rc;433434435