--- Begin Message ---
- To: Debian Bug Tracking System <submit@bugs.debian.org>
- Subject: debbugs: different charsets not supported
- From: Michael Piefel <piefel@debian.org>
- Date: Fri, 1 Feb 2002 20:07:31 +0100
- Message-id: <20020201190731.GA27569@kosh.informatik.hu-berlin.de>
Package: debbugs
Version: N/A; reported 2002-02-01
Severity: normal
Tags: patch
The bug tracking system fails badly on a number of subjects:
1) decode MIME messages (like attachments and QP)
2) decode MIME quoted headers
3) respect encoding of message
Note that this bug is closely related to #56470, #78616 and #127232. In
fact, point 1 above is exactly what was reported in said bugs.
I have solved the problem by doing the following:
- in bugreport.cgi: collect message into a seperate variable, which is
then deMIMEd in common.pl
- mimetoutf8 parses the messages, converting it all to UTF-8
I have tested this on a local filesystem only. For bugreports just
containing plain ASCII text with no QP conversion at all, there is no
difference (actually there are a number of additional empty lines that
don't hurt).
The patch is made against the version that is used on bugs.debian.org.
It is not fixing pkgreport.cgi, but I can do that as well if you want me
to.
Bye,
Mike
--- common.pl.orig Wed Jan 30 18:13:35 2002
+++ common.pl Fri Feb 1 19:16:19 2002
@@ -3,6 +3,12 @@
use DB_File;
use Fcntl qw/O_RDONLY/;
+require MIME::Parser;
+require MIME::WordDecoder;
+require MIME::Words;
+require Unicode::MapUTF8;
+Unicode::MapUTF8::utf8_charset_alias({ 'UTF-8' => 'UTF8' , 'UTF-16' => 'UTF16' });
+
my $common_archive = 0;
my $common_repeatmerged = 1;
my %common_include = ();
@@ -233,6 +239,59 @@
}
$out .= $in;
return $out;
+}
+
+# Helper function for WordDecoder
+sub toutf8 {
+ my ($text, $charset) = @_;
+ if (Unicode::MapUTF8::utf8_supported_charset($charset)) {
+ return Unicode::MapUTF8::to_utf8({-string => $text, -charset => $charset});
+ } else {
+ return MIME::Words::encode_mimeword($text, "Q", $charset);
+ }
+}
+
+sub bodytoutf8 {
+ my $entity = shift;
+ my $body = $entity->bodyhandle->as_string;
+ my $charset = '';
+ unless ($charset = $entity->head->mime_attr('content-type.charset')) {
+ $charset = "US-ASCII";
+ }
+ $charset =~ tr/a-z/A-Z/;
+ if (Unicode::MapUTF8::utf8_supported_charset($charset)) {
+ $body = Unicode::MapUTF8::to_utf8({-string => $body, -charset => $charset});
+ } else {
+ $body = $entity->stringify_body;
+ }
+ return $body;
+}
+
+sub entitytoutf8 {
+ my $entity = shift;
+ my $body = '';
+ if ($entity->bodyhandle) {
+ $body = bodytoutf8($entity);
+ } else {
+ foreach my $part ($entity->parts) {
+ $body .= "\n[-- Attachment --]\n";
+ $body .= entitytoutf8($part);
+ }
+ }
+
+ my $wd = new MIME::WordDecoder;
+ $wd->handler('raw' => "KEEP", 'UTF-8' => "KEEP", '*' => \&toutf8);
+ my $headers = $wd->decode($entity->head->as_string);
+
+ return $headers . "\n" . $body;
+}
+
+sub mimetoutf8 {
+ my $message = shift;
+ my $parser = new MIME::Parser;
+ $parser->output_to_core(1);
+ my $entity = $parser->parse_data($message);
+ return entitytoutf8($entity);
}
sub bugurl {
--- bugreport.cgi.orig Wed Jan 30 18:09:50 2002
+++ bugreport.cgi Fri Feb 1 18:29:16 2002
@@ -102,6 +102,7 @@
my $suppressnext = 0;
my $this = '';
+my $thisbody = '';
my $cmsg = 1;
@@ -138,10 +139,12 @@
if ($newstate eq 'html') {
$this = '';
+ $thisbody = '';
}
if ($newstate eq 'kill-end') {
+ $this .= htmlsanit(mimetoutf8($thisbody));
$this .= "</pre>\n"
if $normstate eq 'go' || $normstate eq 'go-nox';
@@ -180,11 +183,12 @@
|| &quit("bad line \`$pl' in state incoming-recv");
$this = "<h2>Message received at ".htmlsanit("$1\@$2")
. ":</h2><br>\n<pre>\n$_";
+ $thisbody = '';
$normstate= 'go';
} elsif ($normstate eq 'html') {
$this .= $_;
} elsif ($normstate eq 'go') {
- $this .= htmlsanit($_);
+ $thisbody .= $_;
} elsif ($normstate eq 'go-nox') {
next if !s/^X//;
$this .= htmlsanit($_);
@@ -195,11 +199,13 @@
s/\04/, /g; s/\n$//;
$this = "<h2>Message sent to ".htmlsanit($_).":</h2><br>\n";
}
+ $thisbody = '';
$normstate= 'kill-body';
} elsif ($normstate eq 'autocheck') {
next if !m/^X-Debian-Bugs(-\w+)?: This is an autoforward from (\S+)/;
$normstate= 'autowait';
$this = "<h2>Message received at $2:</h2><br>\n";
+ $thisbody = '';
} elsif ($normstate eq 'autowait') {
next if !m/^$/;
$normstate= 'go-nox';
@@ -213,10 +219,11 @@
&quit("$ref state $normstate at end") unless $normstate eq 'kill-end';
close(L);
-print "Content-Type: text/html\n\n";
+print "Content-Type: text/html; charset=utf-8\n\n";
print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
print "<HTML><HEAD>\n" .
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n" .
"<TITLE>$debbugs::gProject $debbugs::gBug report logs - $short</TITLE>\n" .
"</HEAD>\n" .
'<BODY TEXT="#000000" BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#800080">' .
--
|=| Michael Piefel piefel@informatik.hu-berlin.de
|=| Humboldt-UniversitÀt zu Berlin http://www.piefel.de
|=| Tel. (+49 30) 2093 3831
--- End Message ---