Your message dated Thu, 20 May 2021 12:01:27 +0200 with message-id <d797cdcf-b6e7-c575-7319-beea33758c6e@debian.org> and subject line Re: Bug#988771: unblock: mat2/0.12.1-1 has caused the Debian Bug report #988771, regarding unblock: mat2/0.12.1-1 to be marked as done. This means that you claim that the problem has been dealt with. If this is not the case it is now your responsibility to reopen the Bug report if necessary, and/or fix the problem forthwith. (NB: If you are a system administrator and have no idea what this message is talking about, this may indicate a serious mail system misconfiguration somewhere. Please contact owner@bugs.debian.org immediately.) -- 988771: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=988771 Debian Bug Tracking System Contact owner@bugs.debian.org with problems
--- Begin Message ---
- To: submit@bugs.debian.org
- Subject: unblock: mat2/0.12.1-1
- From: Georg Faerber <georg@debian.org>
- Date: Wed, 19 May 2021 11:48:36 +0000
- Message-id: <[🔎] YKT7FO7f+PoyKwlp@debian>
Package: release.debian.org Severity: normal User: release.debian.org@packages.debian.org Usertags: unblock Dear release team, Please unblock mat2 0.12.1-1. It ships improved support of EPUB and Microsoft Office files. It's a key package, as doxygen build-depends on it, but so far regressions haven't been reported, and autopkgtest looks good as well. The diff is quite small: ~ debdiff mat2_0.12.0-1.dsc mat2_0.12.1-1.dsc | diffstat CHANGELOG.md | 5 +++++ debian/changelog | 7 +++++++ doc/mat2.1 | 2 +- libmat2/epub.py | 49 +++++++++++++++++++++++++++++++++++++++++++------ libmat2/office.py | 2 ++ mat2 | 2 +- setup.py | 2 +- 7 files changed, 60 insertions(+), 9 deletions(-) Please find the full debdiff attached. unblock mat2/0.12.1-1 Thanks for your work, cheers, Georgdiff -Nru mat2-0.12.0/CHANGELOG.md mat2-0.12.1/CHANGELOG.md --- mat2-0.12.0/CHANGELOG.md 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/CHANGELOG.md 2021-03-19 16:54:21.000000000 +0000 @@ -1,3 +1,8 @@ +# 0.12.1 - 2021-03-19 + +- Improve epub support +- Improve MS Office support + # 0.12.0 - 2020-12-18 - Improve significantly MS Office formats support diff -Nru mat2-0.12.0/debian/changelog mat2-0.12.1/debian/changelog --- mat2-0.12.0/debian/changelog 2020-12-26 19:52:55.000000000 +0000 +++ mat2-0.12.1/debian/changelog 2021-03-20 19:11:38.000000000 +0000 @@ -1,3 +1,10 @@ +mat2 (0.12.1-1) unstable; urgency=medium + + * New upstream version 0.12.1: + - Ships improved support of EPUB and Microsoft Office files. + + -- Georg Faerber <georg@debian.org> Sat, 20 Mar 2021 19:11:38 +0000 + mat2 (0.12.0-1) unstable; urgency=medium * Team upload. diff -Nru mat2-0.12.0/doc/mat2.1 mat2-0.12.1/doc/mat2.1 --- mat2-0.12.0/doc/mat2.1 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/doc/mat2.1 2021-03-19 16:54:21.000000000 +0000 @@ -1,4 +1,4 @@ -.TH mat2 "1" "December 2020" "mat2 0.12.0" "User Commands" +.TH mat2 "1" "March 2021" "mat2 0.12.1" "User Commands" .SH NAME mat2 \- the metadata anonymisation toolkit 2 diff -Nru mat2-0.12.0/libmat2/epub.py mat2-0.12.1/libmat2/epub.py --- mat2-0.12.0/libmat2/epub.py 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/libmat2/epub.py 2021-03-19 16:54:21.000000000 +0000 @@ -1,7 +1,9 @@ import logging import re import uuid +import zipfile import xml.etree.ElementTree as ET # type: ignore +from typing import Dict, Any from . import archive, office @@ -15,11 +17,28 @@ 'META-INF/container.xml', 'mimetype', 'OEBPS/content.opf', + 'content.opf', + 'hmh.opf', + 'OPS/.+.xml' })) + self.files_to_omit = set(map(re.compile, { # type: ignore + 'iTunesMetadata.plist', + 'META-INF/calibre_bookmarks.txt', + 'OEBPS/package.opf', + })) self.uniqid = uuid.uuid4() - def _specific_get_meta(self, full_path, file_path): - if file_path != 'OEBPS/content.opf': + + def is_archive_valid(self): + super().is_archive_valid() + with zipfile.ZipFile(self.filename) as zin: + for item in self._get_all_members(zin): + member_name = self._get_member_name(item) + if member_name.endswith('META-INF/encryption.xml'): + raise ValueError('the file contains encrypted fonts') + + def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]: + if not file_path.endswith('.opf'): return {} with open(full_path, encoding='utf-8') as f: @@ -30,14 +49,32 @@ except (TypeError, UnicodeDecodeError): return {file_path: 'harmful content', } - def _specific_cleanup(self, full_path: str): - if full_path.endswith('OEBPS/content.opf'): + def _specific_cleanup(self, full_path: str) -> bool: + if full_path.endswith('hmh.opf') or full_path.endswith('content.opf'): return self.__handle_contentopf(full_path) elif full_path.endswith('OEBPS/toc.ncx'): return self.__handle_tocncx(full_path) + elif re.search('/OPS/[^/]+.xml$', full_path): + return self.__handle_ops_xml(full_path) return True - def __handle_tocncx(self, full_path: str): + def __handle_ops_xml(self, full_path: str) -> bool: + try: + tree, namespace = office._parse_xml(full_path) + except ET.ParseError: # pragma: nocover + logging.error("Unable to parse %s in %s.", full_path, self.filename) + return False + + for item in tree.iterfind('.//', namespace): # pragma: nocover + if item.tag.strip().lower().endswith('head'): + item.clear() + break + tree.write(full_path, xml_declaration=True, encoding='utf-8', + short_empty_elements=False) + return True + + + def __handle_tocncx(self, full_path: str) -> bool: try: tree, namespace = office._parse_xml(full_path) except ET.ParseError: # pragma: nocover @@ -53,7 +90,7 @@ short_empty_elements=False) return True - def __handle_contentopf(self, full_path: str): + def __handle_contentopf(self, full_path: str) -> bool: try: tree, namespace = office._parse_xml(full_path) except ET.ParseError: diff -Nru mat2-0.12.0/libmat2/office.py mat2-0.12.1/libmat2/office.py --- mat2-0.12.0/libmat2/office.py 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/libmat2/office.py 2021-03-19 16:54:21.000000000 +0000 @@ -87,6 +87,7 @@ self.files_to_keep = set(map(re.compile, { # type: ignore r'^\[Content_Types\]\.xml$', r'^_rels/\.rels$', + r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table r'^(?:word|ppt|xl)/_rels/document\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$', r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$', @@ -108,6 +109,7 @@ r'^ppt/slideMasters/_rels/slideMaster[0-9]+\.xml\.rels', })) self.files_to_omit = set(map(re.compile, { # type: ignore + r'^\[trash\]/', r'^customXml/', r'webSettings\.xml$', r'^docProps/custom\.xml$', diff -Nru mat2-0.12.0/mat2 mat2-0.12.1/mat2 --- mat2-0.12.0/mat2 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/mat2 2021-03-19 16:54:21.000000000 +0000 @@ -17,7 +17,7 @@ print(e) sys.exit(1) -__version__ = '0.12.0' +__version__ = '0.12.1' # Make pyflakes happy assert Set diff -Nru mat2-0.12.0/setup.py mat2-0.12.1/setup.py --- mat2-0.12.0/setup.py 2020-12-18 16:55:41.000000000 +0000 +++ mat2-0.12.1/setup.py 2021-03-19 16:54:21.000000000 +0000 @@ -5,7 +5,7 @@ setuptools.setup( name="mat2", - version='0.12.0', + version='0.12.1', author="Julien (jvoisin) Voisin", author_email="julien.voisin+mat2@dustri.org", description="A handy tool to trash your metadata",
--- End Message ---
--- Begin Message ---
- To: Georg Faerber <georg@debian.org>, 988771-done@bugs.debian.org
- Subject: Re: Bug#988771: unblock: mat2/0.12.1-1
- From: Paul Gevers <elbrus@debian.org>
- Date: Thu, 20 May 2021 12:01:27 +0200
- Message-id: <d797cdcf-b6e7-c575-7319-beea33758c6e@debian.org>
- In-reply-to: <[🔎] YKT7FO7f+PoyKwlp@debian>
- References: <[🔎] YKT7FO7f+PoyKwlp@debian>
Hi, On 19-05-2021 13:48, Georg Faerber wrote: > unblock mat2/0.12.1-1 Unblocked. PaulAttachment: OpenPGP_signature
Description: OpenPGP digital signature
--- End Message ---