Bug#757545: proposing a patch
tags 757545 patch
thanks
the attached patch attempts to fix this bug.
It implements the license-text-too-short tag which is emitted for all licenses
known by SPDX, which are not in /usr/share/common-licenses and which are less
than half the size that they are supposed to be.
I also used this opportunity to add differing-license-texts-for-short-name
which is emitted if the same short name carries two different license texts.
Lintian should warn about it so that the maintainer either adapts the license
text, renames the license short name or deduplicates the text into a standalone
license paragraph.
And lastly I added license-text-for-multiple-licenses which is triggered if a
license text is added in a Files paragraph that lists more than one short
license name. In that case, there should be exactly one stand-alone license
paragraph for each of these short licenses with their own text each.
What do you think?
cheers, josch
From 4232370ae955ecd8f53d33a63922080bcc3112df Mon Sep 17 00:00:00 2001
From: josch <j.schauer@email.de>
Date: Sun, 10 Aug 2014 20:46:37 +0200
Subject: [PATCH] check length of license texts
---
checks/source-copyright.desc | 29 ++
checks/source-copyright.pm | 44 ++-
data/source-copyright/spdx-license-sizes | 316 +++++++++++++++++++++
.../debian/debian/copyright | 48 ++++
.../source-copyright-missing-license-texts/desc | 8 +
.../source-copyright-missing-license-texts/tags | 3 +
6 files changed, 445 insertions(+), 3 deletions(-)
create mode 100644 data/source-copyright/spdx-license-sizes
create mode 100644 t/tests/source-copyright-missing-license-texts/debian/debian/copyright
create mode 100644 t/tests/source-copyright-missing-license-texts/desc
create mode 100644 t/tests/source-copyright-missing-license-texts/tags
diff --git a/checks/source-copyright.desc b/checks/source-copyright.desc
index 921e7b9..4718da5 100644
--- a/checks/source-copyright.desc
+++ b/checks/source-copyright.desc
@@ -205,3 +205,32 @@ Info: The paragraph has a "License" and a "Copyright" field, but no
Lintian will attempt to guess what you intended and continue based on
its guess. If the guess is wrong, you may see spurious tags related
to this paragraph.
+
+Tag: license-text-too-short
+Severity: normal
+Certainty: possible
+Ref: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Info: The short name indicates that one of the standard licenses was used
+ but the length of the license text is less than half what was expected.
+ You probably forgot to paste the full text of the license as required
+ by policy § 12.5.
+
+Tag: differing-license-texts-for-short-name
+Severity: normal
+Certainty: possible
+Ref: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Info: The same license short name should refer to the exact same license.
+ Therefore, the license texts for the same short name have to be identical.
+ If you do not want to have multiple copies of the same license text,
+ consider creating a single stand-alone license paragraph with the license
+ text and referring to it from the files paragraphs.
+
+Tag: license-text-for-multiple-licenses
+Severity: normal
+Certainty: possible
+Ref: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Info: The <tt>License</tt> header in a <tt>Files</tt> paragraph can carry
+ multiple short license names, separated by "and"s and "or"s as applicable.
+ But if that is the case, then it must not also contain a license text
+ because a license text only describes a single license. Add as many
+ stand-alone license paragraphs as required with individual license texts.
diff --git a/checks/source-copyright.pm b/checks/source-copyright.pm
index 6b9a0c5..c40fc69 100644
--- a/checks/source-copyright.pm
+++ b/checks/source-copyright.pm
@@ -42,6 +42,8 @@ my $BAD_SHORT_LICENSES = Lintian::Data->new(
};
});
+my $LICENSE_SIZES = Lintian::Data->new('source-copyright/spdx-license-sizes', qr/\s++/);
+
my $dep5_last_normative_change = '0+svn~166';
my $dep5_last_overhaul = '0+svn~148';
my %dep5_renamed_fields = (
@@ -230,6 +232,7 @@ sub _parse_dep5 {
my %standalone_licenses;
my %required_standalone_licenses;
my %short_licenses_seen;
+ my %full_license_texts;
for my $field (keys %{$first_para}) {
my $renamed_to = $dep5_renamed_fields{$field};
@@ -281,9 +284,20 @@ sub _parse_dep5 {
tag 'missing-license-text-in-dep5-copyright', $license,
"(paragraph at line $current_line)";
} else {
- for (@short_licenses) {
- $standalone_licenses{$_} = $i;
- $short_licenses_seen{$short_license} = $i;
+ if ($#short_licenses > 0) {
+ # standalone license paragraphs must not have more than
+ # one short license
+ # tag implemented in patch of bug#757615
+ } elsif ($short_license ne '') {
+ $standalone_licenses{$short_license} = $i;
+ if (exists $full_license_texts{$short_license}) {
+ my (undef, $fl) = @{$full_license_texts{$short_license}};
+ if ($fl ne $full_license) {
+ tag 'differing-license-texts-for-short-name', $short_license, "(paragraph at line $current_line)";
+ }
+ } else {
+ $full_license_texts{$short_license} = [$current_line,$full_license];
+ }
}
}
} elsif (defined $files) {
@@ -306,6 +320,19 @@ sub _parse_dep5 {
$required_standalone_licenses{$_} = $i;
}
}
+ if ($#short_licenses > 0 and defined($full_license)) {
+ tag 'license-text-for-multiple-licenses', $short_license, "(paragraph at line $current_line)";
+ }
+ if (defined($full_license) and $short_license ne '') {
+ if (exists $full_license_texts{$short_license}) {
+ my (undef, $fl) = @{$full_license_texts{$short_license}};
+ if ($fl ne $full_license) {
+ tag 'differing-license-texts-for-short-name', $short_license, "(paragraph at line $current_line)";
+ }
+ } else {
+ $full_license_texts{$short_license} = [$current_line,$full_license];
+ }
+ }
} else {
tag 'missing-field-in-dep5-copyright', 'license',
"(paragraph at line $current_line)";
@@ -355,6 +382,17 @@ sub _parse_dep5 {
}
}
}
+ while ((my $license, my $v) = each %full_license_texts) {
+ my ($i, $full_license) = @{$v};
+ # normalize license name by removing trailing ".0" and "-1"
+ # it is already lower case
+ $license =~ s/(?:-1)?(?:\.0)*$//;
+ if (my $size = $LICENSE_SIZES->value($license)) {
+ if (length $full_license < 0.5*$size) {
+ tag 'license-text-too-short', $license, "(paragraph at line $i)";
+ }
+ }
+ }
return;
}
diff --git a/data/source-copyright/spdx-license-sizes b/data/source-copyright/spdx-license-sizes
new file mode 100644
index 0000000..1d3d779
--- /dev/null
+++ b/data/source-copyright/spdx-license-sizes
@@ -0,0 +1,316 @@
+# the following data was generated by executing the following commands
+#
+# $ git clone http://git.spdx.org/license-list.git
+# $ cd license-list
+# $ git checkout v1.20
+# $ ls *.txt \
+# > | egrep --line-regexp --invert-match '(Updating the SPDX Licenses|Apache-2.0|Artistic-1.0-Perl|BSD-3-Clause-LBNL|GFDL-1\.[23]|GPL-[123]\.0\+?|LGPL-[23].0\+?|LGPL-2.1\+?).txt' \
+# > | while read file; do wc -c $file; done \
+# > | perl -pe 's/^(\d+)\s+(.*?)(-1)?(\.0)*\.txt$/\L\2\t\1/' \
+# > | sort
+#
+# - this extracts all license texts from the SPDX sources, removes those
+# licenses which can be found in /usr/share/common-licenses and one
+# file that ends in *.txt but is not a license
+# - retrieves the amount of bytes in each file
+# - sorts by the filename
+# - the perl regex does:
+# * remove trailing ".0"
+# * remove trailing "-1"
+# * make name lowercase
+# * separate by tab
+# * turn the size and name around
+#
+# - it must be noted that /usr/share/common-licenses/Artistic is
+# Artistic-1.0-Perl and that /usr/share/common-licenses/BSD is
+# BSD-3-Clause-LBNL
+aal 2917
+abstyles 740
+adobe-2006 1776
+adobe-glyph 1614
+adsl 489
+afl-1.1 4660
+afl-1.2 4950
+afl-2.1 8964
+afl-2 9034
+afl-3 10348
+afmparse 909
+agpl 15866
+agpl-3 34284
+aladdin 11600
+amdplpa 5587
+aml 2357
+ampas 2191
+antlr-pd 987
+apache-1.1 2483
+apache 2506
+apafml 616
+apl 46087
+apsl-1.1 19959
+apsl-1.2 19628
+apsl 19445
+apsl-2 20142
+artistic-1.0-cl8 5157
+artistic-2 8712
+artistic 4822
+bahyph 1388
+barr 602
+beerware 265
+bittorrent-1.1 27787
+bittorrent 24262
+borceux 647
+bsd-2-clause 1287
+bsd-2-clause-freebsd 1518
+bsd-2-clause-netbsd 1408
+bsd-3-clause 1475
+bsd-3-clause-attribution 1645
+bsd-3-clause-clear 1694
+bsd-4-clause 1633
+bsd-4-clause-uc 1776
+bsd-protection 6259
+bsl 1337
+bzip2-1.0.5 1947
+bzip2-1.0.6 1739
+caldera 2590
+catosl-1.1 19049
+cc0 6951
+cc-by 11483
+cc-by-2 12573
+cc-by-2.5 12782
+cc-by-3 18667
+cc-by-4 17083
+cc-by-nc 11902
+cc-by-nc-2 13586
+cc-by-nc-2.5 13790
+cc-by-nc-3 19607
+cc-by-nc-4 17703
+cc-by-nc-nd 11031
+cc-by-nc-nd-2 12710
+cc-by-nc-nd-2.5 12935
+cc-by-nc-nd-3 17859
+cc-by-nc-nd-4 17505
+cc-by-nc-sa 13065
+cc-by-nc-sa-2 15177
+cc-by-nc-sa-2.5 15386
+cc-by-nc-sa-3 21351
+cc-by-nc-sa-4 19081
+cc-by-nd 10489
+cc-by-nd-2 11696
+cc-by-nd-2.5 11921
+cc-by-nd-3 16930
+cc-by-nd-4 16890
+cc-by-sa 12523
+cc-by-sa-2 14123
+cc-by-sa-2.5 14331
+cc-by-sa-3 21305
+cc-by-sa-4 18433
+cddl-1.1 17406
+cddl 16513
+cecill-1.1 21276
+cecill 21846
+cecill-2 21660
+cecill-b 21924
+cecill-c 22272
+clartistic 6389
+cnri-python 3380
+cnri-python-gpl-compatible 4004
+condor-1.1 6082
+cpal 28318
+cpl 11652
+cpol-1.02 11764
+crossword 486
+cua-opl 23370
+cube 1079
+d-fsl 15237
+diffmark 88
+doc 4473
+dotseqn 261
+dsdp 2281
+dvipdfm 210
+ecl-2 11112
+ecl 2418
+ecos-2 1565
+efl-2 927
+efl 918
+egenix 4260
+entessa 2278
+epl 11346
+erlpl-1.1 13971
+eudatagrid 3205
+eupl-1.1 13221
+eupl 13082
+eurosym 1367
+fair 308
+frameworx 9770
+fsful 198
+fsfullr 234
+ftl 5990
+gfdl-1.1 17996
+giftware 1352
+gl2ps 895
+glide 12289
+glulxe 462
+gnuplot 1411
+gpl-2.0-with-autoconf-exception 1628
+gpl-2.0-with-bison-exception 700
+gpl-2.0-with-classpath-exception 976
+gpl-2.0-with-font-exception 625
+gpl-2.0-with-gcc-exception 528
+gpl-3.0-with-autoconf-exception 1840
+gpl-3.0-with-gcc-exception 3410
+gsoap-1.3b 20374
+haskellreport 602
+hpnd 1195
+ibm-pibs 856
+ijg 4256
+imagemagick 12532
+imatix 3476
+imlib2 2002
+intel 2086
+intel-acpi 6084
+ipa 9098
+ipl 11408
+isc 822
+jasper-2 2741
+json 1114
+latex2e 719
+leptonica 738
+libpng 4218
+libtiff 1139
+lpl-1.02 11750
+lpl 11874
+lppl-1.1 14068
+lppl-1.2 14165
+lppl-1.3a 18052
+lppl-1.3c 18628
+lppl 8955
+makeindex 1974
+miros 2639
+mit 1077
+mit-advertising 1269
+mit-cmu 1198
+mit-enna 1667
+mit-feh 1104
+mitnfa 1531
+motosoto 20186
+mpich2 1510
+mpl-1.1 23669
+mpl 18284
+mpl-2.0-no-copyleft-exception 16725
+mpl-2 15190
+ms-pl 2668
+ms-rl 3065
+mtll 3255
+multics 2044
+mup 1458
+nasa-1.3 13776
+naumen 1953
+nbpl 5426
+ncsa 1682
+netcdf 1880
+newsletr 518
+ngpl 4691
+nlpl 268
+nokia 21006
+nosl 24573
+noweb 1127
+npl-1.1 28077
+npl 21169
+nposl-3 11799
+nrl 3300
+ntp 713
+nunit 1075
+oclc-2 11123
+odbl 25190
+ofl-1.1 4020
+ofl 3867
+ogtsl 5285
+oldap-1.1 5396
+oldap-1.2 5398
+oldap-1.3 5797
+oldap-1.4 5875
+oldap-2.0.1 1979
+oldap-2.1 2222
+oldap-2 1983
+oldap-2.2.1 2243
+oldap-2.2.2 2269
+oldap-2.2 2230
+oldap-2.3 2267
+oldap-2.4 2126
+oldap-2.5 2173
+oldap-2.6 2111
+oldap-2.7 2198
+oldap-2.8 2195
+oml 1688
+openssl 5334
+opl 20311
+osl-1.1 9665
+osl-2.1 9870
+osl-2 9878
+osl-3 10322
+osl 8921
+pddl 15488
+php-3.01 2860
+php-3 2851
+plexus 1900
+postgresql 1194
+psfrag 485
+psutils 1961
+python-2 9414
+qhull 1303
+qpl 4366
+rdisc 1125
+readme 1147
+rhecos-1.1 21640
+rpl-1.1 33932
+rpl-1.5 32010
+rpsl 30266
+rscpl 21049
+ruby 2136
+saxpath 2159
+sax-pd 2314
+scea 6643
+sgi-b-1.1 14570
+sgi-b 13580
+sgi-b-2 1507
+simpl-2 2533
+sissl-1.2 12501
+sissl 14514
+sleepycat 5003
+smlnj 1125
+snia 21779
+spl 23403
+standardml-nj 1069
+sugarcrm-1.1.3 22108
+swl 2128
+tcl 2251
+tmate 2554
+torque-1.1 4013
+tosl 2075
+unicode-tou 6288
+unlicense 1211
+vim 4533
+vostrom 3059
+vsl 2074
+w3c 2682
+watcom 20978
+wsuipa 528
+wtfpl 432
+wxwindows 2321
+x11 1338
+xerox 962
+xfree86-1.1 2377
+xinetd 2032
+xnet 1249
+xpp 2482
+xskat 535
+ypl-1.1 9005
+ypl 9021
+zed 259
+zend-2 2495
+zimbra-1.3 8993
+zlib 842
+zlib-acknowledgement 1133
+zpl-1.1 2923
+zpl-2.1 2074
+zpl-2 2275
diff --git a/t/tests/source-copyright-missing-license-texts/debian/debian/copyright b/t/tests/source-copyright-missing-license-texts/debian/debian/copyright
new file mode 100644
index 0000000..5c4060b
--- /dev/null
+++ b/t/tests/source-copyright-missing-license-texts/debian/debian/copyright
@@ -0,0 +1,48 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: Doohickey
+Upstream-Contact: J. Random Hacker <j.r.hacker@example.com>
+Source: http://examples.com/doohickey/source/
+
+Files: *
+Copyright: 2014, somebody1
+License: CC-BY-SA-3.0
+ this text is too short for CC-BY-SA
+
+Files: debian/*
+Copyright: 2014, somebody1
+License: CC-BY-SA-3.0
+ this text is different than above
+
+Files: debian/c*
+Copyright: 2014, somebody1
+License: foo or bar
+ this license text describes foo and bar but should be split
+ into individual descriptions
+
+Files: debian/d*
+Copyright: 2014, somebody1
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ .
+ 1) Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+ .
+ 2) Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ .
+ 3) Neither the name of the ORGANIZATION nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/t/tests/source-copyright-missing-license-texts/desc b/t/tests/source-copyright-missing-license-texts/desc
new file mode 100644
index 0000000..c2810f1
--- /dev/null
+++ b/t/tests/source-copyright-missing-license-texts/desc
@@ -0,0 +1,8 @@
+Testname: source-copyright-missing-license-texts
+Sequence: 6000
+Version: 1.0
+Description: Test for the full license text for known licenses
+Test-For:
+ differing-license-texts-for-short-name
+ license-text-too-short
+ license-text-for-multiple-licenses
diff --git a/t/tests/source-copyright-missing-license-texts/tags b/t/tests/source-copyright-missing-license-texts/tags
new file mode 100644
index 0000000..44d7594
--- /dev/null
+++ b/t/tests/source-copyright-missing-license-texts/tags
@@ -0,0 +1,3 @@
+W: source-copyright-missing-license-texts source: differing-license-texts-for-short-name cc-by-sa-3.0 (paragraph at line 11)
+W: source-copyright-missing-license-texts source: license-text-for-multiple-licenses foo or bar (paragraph at line 16)
+W: source-copyright-missing-license-texts source: license-text-too-short cc-by-sa-3 (paragraph at line 6)
--
2.0.1
Reply to: