Bug#695373: unblock: catdoc/0.94.4-1.1
Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock
Please unblock package catdoc
This version is an RC bug fix (#692076).
debdiff to the version in testing is attached. The majority of the
patch is the fix for #692073 to remove the .pc subdirectory inadvertently
introduced in the last upstream release and which prevented a sane
fix for the RC bug fix.
Thanks.
unblock catdoc/0.94.4-1.1
-- System Information:
Debian Release: wheezy/sid
APT prefers unstable
APT policy: (500, 'unstable'), (500, 'stable'), (1, 'experimental')
Architecture: amd64 (x86_64)
Foreign Architectures: armel
i386
armhf
Kernel: Linux 3.2.0-3-amd64 (SMP w/4 CPU cores)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8) (ignored: LC_ALL set to en_GB.UTF-8)
Shell: /bin/sh linked to /bin/dash
diffstat for catdoc-0.94.3 catdoc-0.94.4
.pc/.quilt_patches | 1
.pc/.quilt_series | 1
.pc/.version | 1
.pc/applied-patches | 1
.pc/debian-changes-0.94.3-1/doc/catdoc.txt | 258 ----------------------------
.pc/debian-changes-0.94.3-1/doc/catppt.txt | 51 -----
.pc/debian-changes-0.94.3-1/doc/xls2csv.txt | 85 ---------
configure | 2
configure.in | 2
debian/changelog | 12 +
doc/catdoc.1 | 2
doc/catppt.1 | 2
doc/wordview.1 | 2
doc/xls2csv.1 | 2
src/xlsparse.c | 4
tarball.sh | 18 +
16 files changed, 34 insertions(+), 410 deletions(-)
diff -Nru catdoc-0.94.3/configure catdoc-0.94.4/configure
--- catdoc-0.94.3/configure 2012-06-10 14:02:08.000000000 +0100
+++ catdoc-0.94.4/configure 2012-12-03 18:01:26.000000000 +0000
@@ -541,7 +541,7 @@
fi
-catdoc_version=0.94.2
+catdoc_version=0.94.4
# Extract the first word of "gcc", so it can be a program name with args.
set dummy gcc; ac_word=$2
echo $ac_n "checking for $ac_word""... $ac_c" 1>&6
diff -Nru catdoc-0.94.3/configure.in catdoc-0.94.4/configure.in
--- catdoc-0.94.3/configure.in 2012-06-10 12:35:25.000000000 +0100
+++ catdoc-0.94.4/configure.in 2012-12-03 18:01:31.000000000 +0000
@@ -1,6 +1,6 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT(acconfig.h)
-catdoc_version=0.94.2
+catdoc_version=0.94.4
dnl Checks for programs.
AC_PROG_CC
case ${CC} in
diff -Nru catdoc-0.94.3/debian/changelog catdoc-0.94.4/debian/changelog
--- catdoc-0.94.3/debian/changelog 2012-06-10 13:51:32.000000000 +0100
+++ catdoc-0.94.4/debian/changelog 2012-12-03 18:50:42.000000000 +0000
@@ -1,3 +1,15 @@
+catdoc (0.94.4-1.1) unstable; urgency=low
+
+ * Non-maintainer upload.
+ * New upstream release to remove .pc subdirectory from
+ the orig tarball (Closes: #692073). Includes updating
+ version strings in generated manpages.
+ * Remove extra ';' in src/xlsparse.c which turned for loop in
+ xlsparse into a buffer overflow (Closes: #692076), applies
+ patch by Olly Betts <olly@survex.com>.
+
+ -- Neil Williams <codehelp@debian.org> Mon, 03 Dec 2012 18:22:47 +0000
+
catdoc (0.94.3-1) unstable; urgency=low
* Declare new upstream release
diff -Nru catdoc-0.94.3/doc/catdoc.1 catdoc-0.94.4/doc/catdoc.1
--- catdoc-0.94.3/doc/catdoc.1 2012-06-10 14:04:16.000000000 +0100
+++ catdoc-0.94.4/doc/catdoc.1 2012-12-03 18:54:22.000000000 +0000
@@ -1,4 +1,4 @@
-.TH catdoc 1 "Version 0.94.2" "MS-Word reader"
+.TH catdoc 1 "Version 0.94.4" "MS-Word reader"
.SH NAME
catdoc \- reads MS-Word file and puts its content as plain text on standard output
.SH SYNOPSIS
diff -Nru catdoc-0.94.3/doc/catppt.1 catdoc-0.94.4/doc/catppt.1
--- catdoc-0.94.3/doc/catppt.1 2012-06-10 14:04:16.000000000 +0100
+++ catdoc-0.94.4/doc/catppt.1 2012-12-03 18:54:22.000000000 +0000
@@ -1,4 +1,4 @@
-.TH ppt2text 1 "Version 0.94.2" "MS-PowerPoint reader"
+.TH ppt2text 1 "Version 0.94.4" "MS-PowerPoint reader"
.SH NAME
catppt \- reads MS-PowerPoint file and puts its content on standard output
.SH SYNOPSIS
diff -Nru catdoc-0.94.3/doc/wordview.1 catdoc-0.94.4/doc/wordview.1
--- catdoc-0.94.3/doc/wordview.1 2012-06-10 14:04:16.000000000 +0100
+++ catdoc-0.94.4/doc/wordview.1 2012-12-03 18:54:22.000000000 +0000
@@ -1,4 +1,4 @@
-.TH wordview 1 "Version 0.94.2" "MS-Word reader"
+.TH wordview 1 "Version 0.94.4" "MS-Word reader"
.SH NAME
wordview \- displays text contained in MS-Word file in X window
diff -Nru catdoc-0.94.3/doc/xls2csv.1 catdoc-0.94.4/doc/xls2csv.1
--- catdoc-0.94.3/doc/xls2csv.1 2012-06-10 14:04:16.000000000 +0100
+++ catdoc-0.94.4/doc/xls2csv.1 2012-12-03 18:54:22.000000000 +0000
@@ -1,4 +1,4 @@
-.TH xls2csv 1 "Version 0.94.2" "MS-Word reader"
+.TH xls2csv 1 "Version 0.94.4" "MS-Word reader"
.SH NAME
xls2csv \- reads MS-Excel file and puts its content as comma-separated data on standard output
.SH SYNOPSIS
diff -Nru catdoc-0.94.3/.pc/applied-patches catdoc-0.94.4/.pc/applied-patches
--- catdoc-0.94.3/.pc/applied-patches 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/applied-patches 1970-01-01 01:00:00.000000000 +0100
@@ -1 +0,0 @@
-debian-changes-0.94.3-1
diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt
--- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 1970-01-01 01:00:00.000000000 +0100
@@ -1,258 +0,0 @@
-catdoc(1) catdoc(1)
-
-
-
-NAME
- catdoc - reads MS-Word file and puts its content as plain text on stan‐
- dard output
-
-SYNOPSIS
- catdoc [-vlu8btawxV] [-m number] [ -s charset] [ -d charset] [ -f out‐
- put-format] file
-
-
-DESCRIPTION
- catdoc behaves much like cat(1) but it reads MS-Word file and produces
- human-readable text on standard output. Optionally it can use latex(1)
- escape sequences for characters which have special meaning for LaTeX.
- It also makes some effort to recognize MS-Word tables, although it
- never tries to write correct headers for LaTeX tabular environment.
- Additional output formats, such is HTML can be easily defined.
-
- catdoc doesn't attempt to extract formatting information other than
- tables from MS-Word document, so different output modes means mainly
- that different characters should be escaped and different ways used to
- represent characters, missing from output charset. See CHARACTER SUB‐
- STITUTION below
-
-
- catdoc uses internal unicode(7) representation of text, so it is able
- to convert texts when charset in source document doesn't match charset
- on target system. See CHARACTER SETS below.
-
- If no file names supplied, catdoc processes its standard input unless
- it is terminal. It is unlikely that somebody could type Word document
- from keyboard, so if catdoc invoked without arguments and stdin is not
- redirected, it prints brief usage message and exits. Processing of
- standard input (even among other files) can be forced using dash '-' as
- file name.
-
- By default, catdoc wraps lines which are more than 72 chars long and
- separates paragraphs by blank lines. This behavior can be turned of by
- -w switch. In wide mode catdoc prints each paragraph as one long line,
- suitable for import into word processors which perform word wrapping.
-
-
-
-OPTIONS
- -a - shortcut for -f ascii. Produces ASCII text as output. Sepa‐
- rates table columns with TAB
-
- -b - process broken MS-Word file. Normally, catdoc checks if first
- 8 bytes of file is Microsoft OLE signature. If so, it processes
- file, otherwise it just copies it to stdin. It is intended to
- use catdoc as filter for viewing all files with .doc extension.
-
- -dcharset
- - specifies destination charset name. Charset file has format
- described in CHARACTER SETS below and should have .txt exten‐
- sion and reside in catdoc library directory ( ${exec_pre‐
- fix}/lib/catdoc). By default, current locale charset is used if
- langinfo support compiled in.
-
- -fformat
- - specifies output format as described in CHARACTER SUBSTITU‐
- TION below. catdoc comes with two output formats - ascii and
- tex. You can add your own if you wish.
-
- -l Causes catdoc to list names of available charsets to the stdout
- and exit successfully.
-
- -mnumber
- Specifies right margin for text (default 72). -m 0 is equiva‐
- lent to -w
-
- -scharset
- Specifies source charset. (one used in Word document), if Word
- document doesn't contain UTF-16 text. When reading rtf docu‐
- ments, it is typically not necessary, because rtf documents
- contain ansicpg specification. But it can be set wrong by Word
- (I've seen RTF documents on Russian, where cp1252 was speci‐
- fied). In this case this option would take precedence over
- charset, specified in the document. But source_charset state‐
- ment in the configuration file have less priority than charset
- in the document.
-
- -t - shortcut for -f tex
- converts all printable chars, which have special meaning for
- LaTeX(1) into appropriate control sequences. Separates table
- columns by &.
-
- -u - declares that Word document contain UNICODE (UTF-16)
- representation of text (as some Word-97 documents). If catdoc
- fails to correct Word document with default charset, try
- this option.
-
- -8 - declares is Word document is 8 bit. Just in case that catdoc
- recognizes file format incorrectly.
-
- -w disables word wrapping. By default catdoc output is split into
- lines not longer than 72 (or number, specified by -m option)
- characters and paragraphs are separated by blank line. With
- this option each paragraph is one long line.
-
- -x causes catdoc to output unknown UNICODE character as \xNNNN,
- instead of question marks.
-
- -v causes catdoc to print some useless information about word doc‐
- ument structure to stdout before actual start of text.
-
- -V outputs catdoc version
-
-
-CHARACTER SETS
- When processing MS-Word file catdoc uses information about two charac‐
- ter sets, typically different
- - input and output. They are stored in plain text files in catdoc
- data directory. Character set files should contain two whitespace-sepa‐
- rated hexadecimal numbers - 8-bit code in character set and 16-bit Uni‐
- code code. Anything from hash mark to end of line is ignored, as well
- as blank lines.
-
- catdoc distribution includes some of these character sets. Additional
- character set definitions, directly usable by catdoc can be obtained
- from ftp.unicode.org. Charset files have .txt suffix, which shouldn't
- be specified in command-line or configuration files.
-
- Note that catdoc is distributed with Cyrillic charsets as default. If
- you are not Russian, you probably don't want it, an should reconfigure
- catdoc at compile time or in runtime configuration file.
-
- When dealing with documents with charsets other than default, remember
- that Microsoft never uses ISO charsets. While letters in, say cp1252
- are at the same position as in ISO-8859-1, some punctuation signs would
- be lost, if you specify ISO-8859-1 as input charset. If you use cp1252,
- catdoc would deal with those signs as described in CHARACTER SUBSTITU‐
- TION below.
-
-
-CHARACTER SUBSTITUTION
- catdoc converts MS-Word file into following internal Unicode represen‐
- tation:
-
- 1. Paragraphs are separated by ASCII Line Feed symbol (0x000A)
-
- 2. Table cells within row are separated by ASCII Field Separator symbol
- (0x001C)
-
- 3. Table rows are separated by ASCII Record Separator (0x001E)
-
- 4. All printable characters, including whitespace are represented with
- their
- respective UNICODE codes.
-
- This UNICODE representation is subsequently converted into 8-bit text
- in target character set using following four-step algorithm:
-
- 1. List of special characters is searched for given Unicode character.
- If found, then appropriate multi-character sequence is output
- instead of character.
-
- 2. If there is an equivalent in target character set, it is output.
-
- 3. Otherwise, replacement list is searched and, if there is multi-char‐
- acter
- substitution for this UNICODE char, it is output.
-
- 4. If all above fails, "Unknown char" symbol (question mark) is output.
-
- Lists of special characters and list of substitution are character set-
- independent, because special chars should be escaped regardless of
- their existence in target character set (usually, they are parts of
- US-ASCII, and therefore exist in any character set) and replacement
- list is searched only for those characters, which are not found in tar‐
- get character set.
-
- These lists are stored in catdoc data directory in files with prefix of
- format name. These files have following format:
-
- Each line can be either comment (starting with hash mark) or contain
- hexadecimal UNICODE value, separated by whitespace from string, which
- would be substituted instead of it. If string contain no whitespace it
- can be used as is, otherwise it should be enclosed in single or double
- quotes. Usual backslash sequences like '\n','\t' can be used in these
- string.
-
-
-
-RUNTIME CONFIGURATION
- Upon startup catdoc reads its system-wide configuration file /etc/cat‐
- docrc and then user-specific configuration file ${HOME}/.catdocrc.
-
- These files can contain following directives:
-
- source_charset = charset-name
- Sets default source charset, which would be used if no -s
- option specified. Consult configuration of nearby windows work‐
- station to find one you need.
-
- target_charset = charset-name
- Sets default output charset. You probably know, which one you
- use.
-
- charset_path = directory-list
- colon-separated list of directories, which are searched for
- charset files. This allows you to install additional charsets
- in your home directory. If first directory component of path
- is ~ it is replaced by contents of HOME environment variable.
- On MS-DOS platform, if directory name starts with %s, it is
- replaced with directory of executable file. Empty element in
- list (i.e. two consequitve colons) is considered current direc‐
- tory.
-
- map_path = directory-list
- colon-separated list of directories, which are searched for
- special character map and replacement map. Same substitution
- rules as in charset_path are applied.
-
- format = format name
- Output format which would be used by default. catdoc comes
- with two formats - ascii and tex but nothing prevents you from
- writing your own format (set two map files - special character
- map and replacement map).
-
- unknown_char = character specification
- sets character to output instead of unknown Unicode character
- (default '?') Character specification can have one of two form
- - character enclosed in single quotes or hexadecimal code.
-
- use_locale =(yes|no)
- Enables or disables automatic selection of output charset
- (default yes),
- based on system locale settings (if enabled at compile time).
- If automatic detection is enabled, than output charset settings
- in the configuration files (but not in the command line) are
- ignored, and current system locale charset is used instead.
- There are no automatic choice of input charset, based of locale
- language, because most modern Word files (since Word 97) are
- Unicode anyway
-
-
-BUGS
- Doesn't handle fast-saves properly. Prints footnotes as separate para‐
- graphs at the end of file, instead of producing correct LaTeX commands.
- Cannot distinguish between empty table cell and end of table row.
-
-
-
-
-SEE ALSO
- xls2csv(1), cat(1), strings(1), utf(4), unicode(7)
-
-
-AUTHOR
- V.B.Wagner <vitus@45.free.net>
-
-
-
-MS-Word reader Version 0.94.2 catdoc(1)
diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt
--- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt 1970-01-01 01:00:00.000000000 +0100
@@ -1,51 +0,0 @@
-ppt2text(1) ppt2text(1)
-
-
-
-NAME
- catppt - reads MS-PowerPoint file and puts its content on standard out‐
- put
-
-SYNOPSIS
- catppt [-lV] [-b string ] [-s charset ] [-d charset ] files
-
-
-DESCRIPTION
- catppt reads MS-PowerPoint presentations and dumps its content to std‐
- out.
-
-OPTIONS
- -l list known charsets and exit successfully
-
- -bstring
- slides break string. This string (by default - formfeed) would
- be output at the end of each slide page.
-
-
- -dcharset`
- - specifies destination charset name. Charset file has format
- described in CHARACTER SETS section of catdoc(1) manual page.
- By default, current locale charset would be used if langinfo
- support was enabled at the compile time.
-
-
- -scharset
- - specifies source charset. Typically, PowerPoint files use
- UNICODE strings with known charsets, but for some reason you
- may wish to override it.
-
-
- -V outputs version number
-
-
-SEE ALSO
- cat(1), catdoc(1), xls2csv(1), strings(1), utf(4), unicode(4)
-
-
-AUTHOR
- Alex Ott <alexott@gmail.com>
-
-
-
-
-MS-PowerPoint reader Version 0.94.2 ppt2text(1)
diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt
--- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 1970-01-01 01:00:00.000000000 +0100
@@ -1,85 +0,0 @@
-xls2csv(1) xls2csv(1)
-
-
-
-NAME
- xls2csv - reads MS-Excel file and puts its content as comma-separated
- data on standard output
-
-SYNOPSIS
- xls2csv [-xlV] [-f format ] [-b string ] [-s charset ] [-d charset
- ] [-q number ] [-c char] files
-
-
-DESCRIPTION
- xls2csv reads MS-Excel spreadsheet and dumps its content as comma-sepa‐
- rated values to stdout. Numbers are printed without delimiters, strings
- are enclosed in the double quotes. Double-quotes inside string are dou‐
- bled.
-
-OPTIONS
- -x print unknown Unicode chars as \xNNNN, rather than as question
- marks
-
- -l list known charsets and exit successfully
-
- -cchar cell separator char. By default - comma.
-
- -bstring
- sheet break string. This string (by default - formfeed) would
- be output at the end of each workbook page. This string is
- printed after page starting at start of line, but no linefeed
- would be automatically added at the end of string. Include new‐
- line at the ent of sheet separator if you want it to appear on
- separate line by itself
-
- -gnumber number of decimal digits in the numbers. By default maximal
- double precision (system-dependent macro DBL_DIG) is used.
-
- -qnumber
- set quote mode. In quote mode 0 cell contents is never quoted.
- In quote mode 1 only strings which contain spaces, double
- quotes or commas are quoted. In quote mode 2 (default) all
- cells with type string are quoted. In quote mode 3 all cells
- are quoted.
-
-
- -dcharset`
- - specifies destination charset name. Charset file has format
- described in CHARACTER SETS section of catdoc(1) manual page.
- By default, current locale charset would be used if langinfo
- support was enabled at the compile time.
-
-
- -scharset
- - specifies source charset. Typically, Excel files have CODE
- PAGE record, which denotes input charset, but for some reason
- you may wish to override it.
-
- -fformat
- - specifies date/time format to use for output of all Excel
- date and time values. If this option is not specified, for‐
- mat, specified in the spreadsheet is used. On POSIX system any
- format, allowed by strftime(3) can be used as value of this
- option. Under MS-DOS xls2csv implements limited set of strftime
- formats, namely m, d, y, Y, b, l, p, H, M, S.
-
-
- -V outputs version number
-
-
-FILES
- ${HOME}/.catdocrc, catdoc charset files and substitution map files (see
- catdoc(1) manual page for details,
-
-
-SEE ALSO
- cat(1), catdoc(1), strings(1), utf8(7), unicode(7)
-
-
-AUTHOR
- V.B.Wagner <vitus@45.free.net>, based on biffview by David Rysdam
-
-
-
-MS-Word reader Version 0.94.2 xls2csv(1)
diff -Nru catdoc-0.94.3/.pc/.quilt_patches catdoc-0.94.4/.pc/.quilt_patches
--- catdoc-0.94.3/.pc/.quilt_patches 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/.quilt_patches 1970-01-01 01:00:00.000000000 +0100
@@ -1 +0,0 @@
-debian/patches
diff -Nru catdoc-0.94.3/.pc/.quilt_series catdoc-0.94.4/.pc/.quilt_series
--- catdoc-0.94.3/.pc/.quilt_series 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/.quilt_series 1970-01-01 01:00:00.000000000 +0100
@@ -1 +0,0 @@
-series
diff -Nru catdoc-0.94.3/.pc/.version catdoc-0.94.4/.pc/.version
--- catdoc-0.94.3/.pc/.version 2012-06-10 14:26:51.000000000 +0100
+++ catdoc-0.94.4/.pc/.version 1970-01-01 01:00:00.000000000 +0100
@@ -1 +0,0 @@
-2
diff -Nru catdoc-0.94.3/src/xlsparse.c catdoc-0.94.4/src/xlsparse.c
--- catdoc-0.94.3/src/xlsparse.c 2012-06-10 13:47:37.000000000 +0100
+++ catdoc-0.94.4/src/xlsparse.c 2012-12-03 18:03:52.000000000 +0000
@@ -589,8 +589,8 @@
void CleanUpFormatIdxUsed() {
int i;
- for (i=0;i<NUMOFDATEFORMATS; i++);
- FormatIdxUsed[i]=0;
+ for (i=0;i<NUMOFDATEFORMATS; i++)
+ FormatIdxUsed[i]=0;
}
/*
diff -Nru catdoc-0.94.3/tarball.sh catdoc-0.94.4/tarball.sh
--- catdoc-0.94.3/tarball.sh 2012-06-10 14:02:08.000000000 +0100
+++ catdoc-0.94.4/tarball.sh 2012-12-03 18:49:06.000000000 +0000
@@ -5,8 +5,18 @@
set -e
-debclean
-cd ../
-tar -czf catdoc-0.94.3.tar.gz ./catdoc-0.94.3 --exclude=debian --exclude=.svn
- ln -sf catdoc-0.94.3.tar.gz catdoc_0.94.3.orig.tar.gz
+test ! -d .pc
+
+VERSION=0.94.4
+#debclean
+find . -name '*.o' -delete
+rm -f config.cache config.log config.status Makefile src/catppt src/xls2csv
+rm -f doc/Makefile charsets/Makefile build-stamp install-stamp src/catdoc src/wordview src/Makefile
+cd ../
+if [ -h catdoc-${VERSION} ]; then
+ rm catdoc-${VERSION}
+fi
+ln -s ./wheezy/ catdoc-${VERSION}
+tar -czf catdoc-${VERSION}.tar.gz ./catdoc-${VERSION}/* --exclude=debian --exclude=.svn
+ln -sf catdoc-${VERSION}.tar.gz catdoc_${VERSION}.orig.tar.gz
Reply to: