Bug#762255: "collect DLAs on www.d.o"
Package: www.debian.org
Followup-For: Bug #762255
Attached is a partial patch to implement DLAs.
As mentioned, I found the recent_list.wml code incomprehensible. So
I decided to refactor it completely. The result of this refactoring are
the attached files recent_list_security.wml and recent_list_common.wml.
As the filenames indicate, this is not a complete replacement, yet. So
far this only covers security, not News or events. But I think it already
demonstrates the value of the excercise.
Also attached is a dla parser script.
Feedback welcome.
Regards,
Frank
-- System Information:
Debian Release: 8.4
APT prefers stable-updates
APT policy: (500, 'stable-updates'), (500, 'stable')
Architecture: amd64 (x86_64)
Kernel: Linux 3.16.0-4-amd64 (SMP w/8 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/bash
Init: systemd (via /run/systemd/system)
? 762255.patch
? crossreferences.en.html
? cve-compatibility.en.html
? dsa-long.en.rdf
? dsa.en.rdf
? faq.en.html
? index.en.html
? pam-auth.en.html
? parse-dla.pl
? ref-table.inc
Index: Makefile
===================================================================
RCS file: /cvs/webwml/webwml/english/security/Makefile,v
retrieving revision 1.70
diff -u -r1.70 Makefile
--- Makefile 10 Nov 2012 15:44:04 -0000 1.70
+++ Makefile 4 Apr 2016 20:45:25 -0000
@@ -12,10 +12,13 @@
index.$(LANGUAGE).html: index.wml $(wildcard $(CUR_YEAR)/dsa-*.wml) \
+ $(wildcard $(CUR_YEAR)/dla-*.wml) \
$(wildcard $(ENGLISHSRCDIR)/security/$(CUR_YEAR)/dsa-*.wml) \
$(wildcard $(ENGLISHSRCDIR)/security/$(CUR_YEAR)/dsa-*.data) \
+ $(wildcard $(ENGLISHSRCDIR)/security/$(CUR_YEAR)/dla-*.wml) \
+ $(wildcard $(ENGLISHSRCDIR)/security/$(CUR_YEAR)/dla-*.data) \
$(TEMPLDIR)/release_info.wml \
- $(TEMPLDIR)/template.wml $(TEMPLDIR)/recent_list.wml $(GETTEXTDEP)
+ $(TEMPLDIR)/template.wml $(TEMPLDIR)/recent_list_security.wml $(GETTEXTDEP)
pam-auth.$(LANGUAGE).html: pam-auth.wml \
$(ENGLISHSRCDIR)/security/pam-auth.wml
@@ -52,7 +55,10 @@
$(wildcard $(CUR_YEAR)/dsa-*.wml) \
$(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dsa-*.wml) \
$(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dsa-*.data) \
- $(TEMPLDIR)/recent_list.wml $(GETTEXTDEP)
+ $(wildcard $(CUR_YEAR)/dla-*.wml) \
+ $(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dla-*.wml) \
+ $(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dla-*.data) \
+ $(TEMPLDIR)/recent_list_security.wml $(GETTEXTDEP)
ifeq "$(LANGUAGE)" "zh"
@echo -n "Processing $(<F): "
$(shell echo $(WML) | perl -pe 's,:.zh-(..)\.html,:dsa.zh-$$1.rdf,g') \
@@ -68,7 +74,10 @@
$(wildcard $(CUR_YEAR)/dsa-*.wml) \
$(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dsa-*.wml) \
$(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dsa-*.data) \
- $(TEMPLDIR)/recent_list.wml $(GETTEXTDEP)
+ $(wildcard $(CUR_YEAR)/dla-*.wml) \
+ $(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dla-*.wml) \
+ $(wildcard $(ENGLISHDIR)/security/$(CUR_YEAR)/dla-*.data) \
+ $(TEMPLDIR)/recent_list_security.wml $(GETTEXTDEP)
ifeq "$(LANGUAGE)" "zh"
@echo -n "Processing $(<F): "
$(shell echo $(WML) | perl -pe 's,:.zh-(..)\.html,:dsa-long.zh-$$1.rdf,g') \
Index: dsa-long.rdf.in
===================================================================
RCS file: /cvs/webwml/webwml/english/security/dsa-long.rdf.in,v
retrieving revision 1.6
diff -u -r1.6 dsa-long.rdf.in
--- dsa-long.rdf.in 30 Apr 2014 09:22:52 -0000 1.6
+++ dsa-long.rdf.in 4 Apr 2016 20:45:25 -0000
@@ -1,4 +1,4 @@
-#use wml::debian::recent_list
+#use wml::debian::recent_list_security
<bind-gettext-domain domain="security" />
@@ -21,11 +21,11 @@
<dc:date><:= rdf_ctime(); :></dc:date>
<items>
<rdf:Seq>
-<:= get_recent_list ( '1m', '6', '$(ENGLISHDIR)/security', 'rdfseq bydate', 'dsa-\d+' ); :>
+<:= get_recent_security_list_rdf('rdfseq', '1m', '6', '.', '$(ENGLISHDIR)/security' ); :>
</rdf:Seq>
</items>
</channel>
-<:= get_recent_list ( '1m', '6', '$(ENGLISHDIR)/security', 'rdflong bydate', 'dsa-\d+' ); :>
+<:= get_recent_security_list_rdf('rdf-long', '1m', '6', '.', '$(ENGLISHDIR)/security' ); :>
</rdf:RDF>
Index: dsa.rdf.in
===================================================================
RCS file: /cvs/webwml/webwml/english/security/dsa.rdf.in,v
retrieving revision 1.10
diff -u -r1.10 dsa.rdf.in
--- dsa.rdf.in 30 Apr 2014 09:22:52 -0000 1.10
+++ dsa.rdf.in 4 Apr 2016 20:45:25 -0000
@@ -1,4 +1,4 @@
-#use wml::debian::recent_list
+#use wml::debian::recent_list_security
<bind-gettext-domain domain="security" />
@@ -21,11 +21,11 @@
<dc:date><:= rdf_ctime(); :></dc:date>
<items>
<rdf:Seq>
-<:= get_recent_list ( '1m', '6', '$(ENGLISHDIR)/security', 'rdfseq bydate', 'dsa-\d+' ); :>
+<:= get_recent_security_list_rdf( 'rdfseq', '1m', '6', '.', '$(ENGLISHDIR)/security' ); :>
</rdf:Seq>
</items>
</channel>
-<:= get_recent_list ( '1m', '6', '$(ENGLISHDIR)/security', 'rdf bydate', 'dsa-\d+' ); :>
+<:= get_recent_security_list_rdf( 'rdf', '1m', '6', '.', '$(ENGLISHDIR)/security' ); :>
</rdf:RDF>
Index: index.wml
===================================================================
RCS file: /cvs/webwml/webwml/english/security/index.wml,v
retrieving revision 1.98
diff -u -r1.98 index.wml
--- index.wml 7 Sep 2014 08:56:14 -0000 1.98
+++ index.wml 4 Apr 2016 20:45:25 -0000
@@ -1,6 +1,6 @@
#use wml::debian::template title="Security Information" GEN_TIME="yes"
#use wml::debian::toc
-#use wml::debian::recent_list
+#use wml::debian::recent_list_security
#include "$(ENGLISHDIR)/releases/info"
<define-tag toc-title-formatting endtag="required" whitespace="delete">
@@ -77,7 +77,7 @@
debian-security-announce</a> list.
<p>
-<:= get_recent_list( '1m', '6', '$(ENGLISHDIR)/security', 'bydate', 'dsa-\d+' ) :>
+<:= get_recent_security_list( '1m', '6', '.', '$(ENGLISHDIR)/security' ) :>
</p>
{#rss#:
Index: make-ref-table.pl
===================================================================
RCS file: /cvs/webwml/webwml/english/security/make-ref-table.pl,v
retrieving revision 1.21
diff -u -r1.21 make-ref-table.pl
--- make-ref-table.pl 30 Apr 2014 09:22:52 -0000 1.21
+++ make-ref-table.pl 4 Apr 2016 20:45:25 -0000
@@ -177,14 +177,13 @@
if ( ! $opt_p ) {
#Don't print DSA- for those that have year format (old
#type of advisories)
- print "DSA-" if $dsa !~ /\d{6,}/ ;
- print "$dsa\t$dsaref{$dsa}{'printtext'}\t";
+ print uc($dsa);
+ print "\t$dsaref{$dsa}{'printtext'}\t";
print gmctime($dsaref{$dsa}{'date'})."\n" ;
} else {
print "<tr VALIGN=\"TOP\"><td>";
print "<a href=\"https://www.debian.org/security/".$dsaref{$dsa}{'location'}."\">";
- print "DSA-" if $dsa !~ /\d{6,}/ ;
- print "$dsa</a>";
+ print uc($dsa)."</a>";
print "</td>$dsaref{$dsa}{'printtext'} </tr>\n";
}
}
@@ -195,7 +194,7 @@
sub parsefile {
my ($file,$filename) = @_ ;
# The filename gives us the DSA we are parsing
- if ( $filename =~ /dsa\-(\d+)/ || $filename =~ /(\d+\w+)/ ) {
+ if ( $filename =~ /(d[ls]a\-\d+)/ || $filename =~ /(\d+\w+)/ ) {
$dsa=$1;
} else {
print STDERR "File $file does not look like a proper DSA, not checking\n" if $opt_v;
Index: 2015/index.wml
===================================================================
RCS file: /cvs/webwml/webwml/english/security/2015/index.wml,v
retrieving revision 1.1
diff -u -r1.1 index.wml
--- 2015/index.wml 4 Jan 2015 13:02:17 -0000 1.1
+++ 2015/index.wml 4 Apr 2016 20:45:37 -0000
@@ -1,8 +1,8 @@
<define-tag pagetitle>Security Advisories from 2015</define-tag>
#use wml::debian::template title="<pagetitle>" GEN_TIME="yes"
-#use wml::debian::recent_list
+#use wml::debian::recent_list_security
-<:= get_recent_list ('.', '0', '$(ENGLISHDIR)/security/2015', '', 'dsa-\d+' ) :>
+<:= get_directory_security_list ('.', '$(ENGLISHDIR)/security/2015' ) :>
<p>You can get the latest Debian security advisories by subscribing to our
<a href="https://lists.debian.org/debian-security-announce/">\
Index: 2015/Makefile
===================================================================
RCS file: /cvs/webwml/webwml/english/security/2015/Makefile,v
retrieving revision 1.1
diff -u -r1.1 Makefile
--- 2015/Makefile 5 Jan 2015 14:56:53 -0000 1.1
+++ 2015/Makefile 4 Apr 2016 20:45:47 -0000
@@ -17,5 +17,5 @@
index.$(LANGUAGE).html: index.wml $(wildcard dsa-[0-9]*.wml) \
$(ENGLISHSRCDIR)/$(CUR_DIR)/dsa-[0-9]*.data \
- $(TEMPLDIR)/template.wml $(TEMPLDIR)/recent_list.wml $(GETTEXTDEP)
+ $(TEMPLDIR)/template.wml $(TEMPLDIR)/recent_list_security.wml $(GETTEXTDEP)
$(WML) $(<F)
<<< text/html; charset="utf-8": Unrecognized >>>
Title: $file_data->{title}
#use wml::debian::ctime
#use wml::debian::common_tags
#use wml::debian::openrecode
#use wml::debian::recent_list_common
No items for this year.
(new revision)
# get_recent_security_list( $time, $number, $security_dir, $english_dir )
sub get_recent_security_list {
return get_recent_list( @_, qr/^d[ls]a-.+.wml$/, \&get_dsa_data, \&format_security_items, \&format_security_item );
}
# get_recent_security_list_rdf( [rdf, rdf-long, rdfseq], $time, $number, $security_dir, $english_dir )
sub get_recent_security_list_rdf {
my ($format) = shift;
#warn "get_recent_security_list_rdf( format = $format )";
if( $format eq 'rdf' ){
return get_recent_list( @_, qr/^d[ls]a-.+.wml$/, \&get_dsa_data, \&format_security_items, \&format_security_item_rdf );
}elsif( $format eq 'rdf-long' ){
return get_recent_list( @_, qr/^d[ls]a-.+.wml$/, \&get_dsa_data, \&format_security_items, \&format_security_item_rdf_long );
}elsif( $format eq 'rdfseq' ){
return get_recent_list( @_, qr/^d[ls]a-.+.wml$/, \&get_dsa_data, \&format_security_items, \&format_security_item_rdf_seq );
}else{
die "unknown format $format\n";
}
}
# get_directory_security_list( $security_rel_path, $english_dir )
sub get_directory_security_list {
return get_directory_list( @_, qr/^d[ls]a-.+.wml$/, \&get_dsa_data, \&format_security_items, \&format_security_item );
}
sub get_dsa_data {
my ($file, $eng_dir, $data) = @_;
(my $basename = $file) =~ s/\.wml$//;
#warn "get_dsa_data( $file, $eng_dir, $data )\n";
my $content = slurp_file_openrecode($file, $eng_dir);
my $title = match_tag($content, 'pagetitle');
my $desc = match_tag($content, 'description');
my $moreinfo = match_tag_first_p($content, 'moreinfo');
# read in datafile
my $data_content = slurp_file("$eng_dir/$basename.data");
my $t = match_tag($data_content, 'pagetitle');
$title = $t if $t;
$rdate = match_tag($data_content, 'report_date');
my @hdate = split ',', $rdate;
my @isodate = map( sprintf("%04d-%02d-%02d", split '-', $_), @hdate );
$title =~ s/(D[LS]A-\d{3,})-\d{1}/$1/; # strip off the revision in the DSA number
#warn "rdate=$rdate title=$title desc=$desc\n";
foreach my $isodate (@isodate){
my $timestamp = iso2stamp($isodate);
#warn "$isodate ($timestamp) => $file\n";
push @{$data->{$timestamp}}, {
file => $file,
basename => $basename,
isodate => $isodate,
date => newsdate($isodate),
title => $title,
description => $desc,
moreinfo => $moreinfo,
};
}
}
sub format_security_items {
my ($data, $format_item) = @_;
my @dates = sort { $b <=> $a } keys %$data;
unless (@dates) {
return "\n";
}
my @str;
foreach my $date (@dates){
#warn "date=".scalar gmtime($date)."\n";
foreach my $file_data (sort { $b->{title} cmp $a->{title} } @{$data->{$date}}){
#warn "title=$file_data->{title}\n";
push @str, $format_item->($file_data);
}
}
return join('',@str);
}
sub format_security_item {
my ($file_data) = @_;
return "[$file_data->{date}] "
."{basename}\">$file_data->{title} "
."$file_data->{description}
\n";
}
my $security_homepage = 'https://www.debian.org/security/';
sub format_security_item_rdf_seq {
my ($file_data) = @_;
# we need absolute paths for rdf
(my $rdfbase = $file_data->{basename}) =~ s@^security/@@;
return qq{\n};
}
sub format_security_item_rdf {
my ($file_data) = @_;
# we need absolute paths for rdf
(my $rdfbase = $file_data->{basename}) =~ s@^security/@@;
return "\n"
."- \n"
." \n"
." $security_homepage$rdfbase\n"
." \n"
." $file_data->{description}\n"
." \n"
." $file_data->{isodate}\n"
."
\n";
}
sub format_security_item_rdf_long {
my ($file_data) = @_;
# we need absolute paths for rdf
(my $rdfbase = $file_data->{basename}) =~ s@^security/@@;
# $moreinfo is WML/HTML; we need to strip tags here
my $moreinfo = $file_data->{moreinfo};
# HTML entities
$moreinfo =~ s/(&[^#;]+;)/&decodehtmlentity($1)/ge;
#
$moreinfo =~ s#"]+)"?>#$1#g;
$moreinfo =~ s#]+)">#$1#g;
# HTML tags
$moreinfo =~ s/</g;
$moreinfo =~ s/>/>/g;
$moreinfo =~ s/"/"/g; #"
# WML continuation
$moreinfo =~ s/\\\n//g;
return "\n"
."- \n"
." $file_data->{title} - $file_data->{description}\n"
." $security_homepage$rdfbase\n"
." \n"
." $moreinfo\n"
." \n"
." $file_data->{isodate}\n"
."
\n";
}
# decode_html_entity is used in the RDF outputs to convert the predefined
# HTML/SGML entities to NCRs, as they are not predefined for XML formats.
sub decodehtmlentity {
my $ent = shift;
# ISO 8859-1 entities
@entities = (
' ', '¡', '¢', '£', '¤', '¥',
'¦', '§', '¨', '©', 'ª', '«', '¬',
'', '®', '¯', '°', '±', '²', '³',
'´', 'µ', '¶', '·', '¸', '¹',
'º', '»', '¼', '½', '¾', '¿',
'À', 'Á', 'Â', 'Ã', 'Ä', 'Å',
'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë',
'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ',
'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×',
'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý',
'Þ', 'ß', 'à', 'á', 'â', 'ã',
'ä', 'å', 'æ', 'ç', 'è', 'é',
'ê', 'ë', 'ì', 'í', 'î', 'ï',
'ð', 'ñ', 'ò', 'ó', 'ô', 'õ',
'ö', '÷', 'ø', 'ù', 'ú', 'û',
'ü', 'ý', 'þ', 'ÿ'
);
for (my $i = 0; $i < $#entities; ++ $i)
{
return "".($i+160).";" if $entities[$i] eq $ent;
}
# Non-ISO 8859-1 entities
%entities = (
# Specials
'Œ' => 'Œ',
'œ' => 'œ',
'Š' => 'Š',
'š' => 'š',
'Ÿ' => 'Ÿ',
'ˆ' => 'ˆ',
'˜' => '˜',
' ' => ' ',
' ' => ' ',
' ' => ' ',
'' => '',
'' => '',
'' => '',
'' => '',
'–' => '–',
'—' => '—',
'‘' => '‘',
'’' => '’',
'‚' => '‚',
'“' => '“',
'”' => '”',
'„' => '„',
'†' => '†',
'‡' => '‡',
'‰' => '‰',
'‹' => '‹',
'›' => '›',
'€' => '€',
# Symbols
'ƒ' => 'ƒ',
'Α' => 'Α',
'Β' => 'Β',
'Γ' => 'Γ',
'Δ' => 'Δ',
'Ε' => 'Ε',
'Ζ' => 'Ζ',
'Η' => 'Η',
'Θ' => 'Θ',
'Ι' => 'Ι',
'Κ' => 'Κ',
'Λ' => 'Λ',
'Μ' => 'Μ',
'Ν' => 'Ν',
'Ξ' => 'Ξ',
'Ο' => 'Ο',
'Π' => 'Π',
'Ρ' => 'Ρ',
'Σ' => 'Σ',
'Τ' => 'Τ',
'Υ' => 'Υ',
'Φ' => 'Φ',
'Χ' => 'Χ',
'Ψ' => 'Ψ',
'Ω' => 'Ω',
'α' => 'α',
'β' => 'β',
'γ' => 'γ',
'δ' => 'δ',
'ε' => 'ε',
'ζ' => 'ζ',
'η' => 'η',
'θ' => 'θ',
'ι' => 'ι',
'κ' => 'κ',
'λ' => 'λ',
'μ' => 'μ',
'ν' => 'ν',
'ξ' => 'ξ',
'ο' => 'ο',
'π' => 'π',
'ρ' => 'ρ',
'ς' => 'ς',
'σ' => 'σ',
'τ' => 'τ',
'υ' => 'υ',
'φ' => 'φ',
'χ' => 'χ',
'ψ' => 'ψ',
'ω' => 'ω',
'ϑ' => 'ϑ',
'ϒ' => 'ϒ',
'ϖ' => 'ϖ',
'•' => '•',
'…' => '…',
'′' => '′',
'″' => '″',
'‾' => '‾',
'⁄' => '⁄',
'℘' => '℘',
'ℑ' => 'ℑ',
'ℜ' => 'ℜ',
'™' => '™',
'ℵ' => 'ℵ',
'←' => '←',
'↑' => '↑',
'→' => '→',
'↓' => '↓',
'↔' => '↔',
'↵' => '↵',
'⇐' => '⇐',
'⇑' => '⇑',
'⇒' => '⇒',
'⇓' => '⇓',
'⇔' => '⇔',
'∀' => '∀',
'∂' => '∂',
'∃' => '∃',
'∅' => '∅',
'∇' => '∇',
'∈' => '∈',
'∉' => '∉',
'∋' => '∋',
'∏' => '∏',
'∑' => '∑',
'−' => '−',
'∗' => '∗',
'√' => '√',
'∝' => '∝',
'∞' => '∞',
'∠' => '∠',
'∧' => '∧',
'∨' => '∨',
'∩' => '∩',
'∪' => '∪',
'∫' => '∫',
'∴' => '∴',
'∼' => '∼',
'≅' => '≅',
'≈' => '≈',
'≠' => '≠',
'≡' => '≡',
'≤' => '≤',
'≥' => '≥',
'⊂' => '⊂',
'⊃' => '⊃',
'⊄' => '⊄',
'⊆' => '⊆',
'⊇' => '⊇',
'⊕' => '⊕',
'⊗' => '⊗',
'⊥' => '⊥',
'⋅' => '⋅',
'⌈' => '⌈',
'⌉' => '⌉',
'⌊' => '⌊',
'⌋' => '⌋',
'〈' => '〈',
'〉' => '〉',
'◊' => '◊',
'♠' => '♠',
'♣' => '♣',
'♥' => '♥',
'♦' => '♦',
);
return $entities{$ent} if defined $entities{$ent};
return '?'; # Say what?
}
#
# vim:ts=8:sw=4:
#
<perl>
# common functions for all recent_list_*.wml
sub get_recent_list {
my ($time, $number, $rel_path, $english_dir, $files_match, $data_callback, $format_list_callback, $format_item_callback) =@_;
#warn "get_recent_list($time, $number, $rel_path, $english_dir, $files_match, $data_callback, $format_list_callback, $format_item_callback)\n";
my $since_date = determine_since_date($time);
#warn "since_date=$since_date\n";
my $files = get_matching_filenames_by_time($rel_path, $english_dir, $files_match, $since_date);
my $data = get_files_data($files, $english_dir, $data_callback);
my $filtered_data = filter_items($data, $since_date, $number);
my $str = $format_list_callback->($filtered_data, $format_item_callback);
return $str;
}
sub get_directory_list {
my ($rel_path, $eng_dir, $files_match, $data_callback, $format_list_callback, $format_item_callback) = @_;
my $files = get_matching_filenames($security_rel_path, $eng_dir, qr/^d[ls]a-.+.wml$/);
my $data = get_files_data($files, $eng_dir, \&get_dsa_data);
my $str = $format_list_callback->($data, $format_item_callback);
return $str;
}
sub slurp_file_openrecode {
my ($file, $eng_dir) = @_;
(my $trans_title = $file) =~ s/\.wml$/\.title/;
# read file in
my $fh = openrecode($file, $trans_title, "$eng_dir/$file")
or die "couldn't open $eng_dir/$file: $!\n";
my $content;
<protect pass=2>
{
local $/;
$content = <$fh>;
}
</protect>
close $fh;
return $content;
}
sub slurp_file {
my ($file) = @_;
open my $fh, '<', $file
or die "couldn't open $file: $!\n";
my $content;
<protect pass=2>
{
local $/;
$content = <$fh>;
}
</protect>
close $fh;
return $content;
}
sub match_tag {
my ($content, $tag) = @_;
my $value;
<protect pass=2>
if ($content =~ m|^<define-tag $tag>\s*(.*?)\s*</define-tag>|ms) {
$value = qq/$1/; } # all
</protect>
return $value;
}
sub match_tag_first_p {
my ($content, $tag) = @_;
my $value;
<protect pass=2>
if ($content =~ m"^<define-tag $tag>\s*(?:(.*?</p>)|(.*?)</define-tag>)"ms) {
$value = qq/$1/; } # all
</protect>
return $value;
}
sub determine_since_date {
my ($time) = @_;
return parse_time($time) if $time;
return '';
}
sub determine_relevant_years {
my ($since_date) = @_;
my $year = $(CUR_YEAR);
$since_year = $year;
if ($since_date){
$since_year = (gmtime($since_date))[5] + 1900;
if ($since_year > $year) {
warn "since_year > year ($since_year > $year)\n";
}
}
# djpig: take $since_year-1, perhaps better define an $oldest_year?
# djpig: but there should be no more updates to an item after a year
# djpig: we're saving time so.
return [($since_year-1) .. $year];
}
sub get_matching_filenames_by_time {
my ($rel_path, $english_dir, $match, $since_date) =@_;
my @files;
my $years = determine_relevant_years($since_date);
for my $act_year (@$years) {
my $act_path = $rel_path eq '.' ? $act_year : "$rel_path/$act_year";
my $new_files = get_matching_filenames($act_path, "$english_dir/$act_year", $match);
push @files, @$new_files;
}
return \@files;
}
sub get_matching_filenames {
my ($rel_path, $eng_dir, $match) = @_;
#warn "get_match_filesnames( $rel_path, $eng_dir, $match )\n";
opendir my $dir_h, $eng_dir
or die "couldn't open dir $eng_dir: $!\n";
my @files = grep { ($_ =~ $match)
&& -f "$eng_dir/$_"
&& ($_="$rel_path/$_")
} readdir($dir_h);
closedir $dir_h;
return \@files;
}
sub get_files_data {
my ($files, $eng_dir, $callback) = @_;
my %data;
foreach my $file (@$files) {
$callback->($file, $eng_dir, \%data);
}
return \%data;
}
sub filter_items {
my ($data, $since_date, $minnumber) = @_;
my @dates = sort { $b <=> $a } keys %$data;
#warn "since_date: ".scalar gmtime($since_date)." minnum: $minnumber\n";
my $count = 0;
my %filtered_data;
foreach my $date (@dates) {
#warn "date: ".scalar gmtime($date)." ($count >= $minnumber) && ($date < $since_date)\n";
if ($count >= $minnumber) {
if((!$since_date && $minnumber)
|| (($since_date || !$minnum)
&& ($date lt $since_date)) ) {
last;
}
}
$filtered_data{$date} = $data->{$date};
$count += scalar @{$data->{$date}};
}
return \%filtered_data;
}
# parse_time gets as argument a string and returns a unix timestamp
# Input: $time_str - String with the following format
# $time_str ::= <integer>(d|w|m|y)
# Output: integer timestamp
#
# parse_time subtracts <integer> days/weeks/months from the actual time and
# returns the corresponding timestamp. Years are handled special: 1y means
# "since January, 1st of actual year", 2y means "since January, 1st of
# last year", etc.
sub parse_time {
my $time_str = shift;
my $year = (gmtime())[5] + 1900;
my $time = time();
my $res;
for ($time_str) {
/\d{4}/ && do {
$res = timegm(0,0,0,1,0,$year);
last;
};
/(\d+)d/ && do {
$res = $time - 86400 * $1;
last;
};
/(\d+)w/ && do {
$res = $time - 86400 * 7 * $1;
last;
};
/(\d+)m/ && do {
# All months have 30 days,
# all other would be far more complicated
$res = $time - 86400 * 30 * $1;
last;
};
/(\d+)y/ && do {
# years are handled special
my $ryear = $year - $1 + 1; # the actual year count as a whole one
$res = timegm(0,0,0,1,0,$ryear); # 01.01.$ryear 00:00:00
last;
};
}
return $res;
}
# iso2stamp converts a date in ISO format (YYYY-MM-DD) to an
# unix timestamp for 23:59:59 on the specified day
# Input: $time - String with the ISO date
# Output: integer timestamp
sub iso2stamp {
my $time = shift;
if ($time =~ /undated/) {
return 0;
}
my ($year, $month, $day) = ($time =~ /(\d{4})-(\d{1,2})-(\d{1,2})/);
unless ($year && $month && $day) { warn "not an ISO date: $time\n"; }
return timegm( 59, 59, 23, $day, $month-1, $year);
}
</perl>
Reply to: