--- Begin Message ---
- To: Debian Bug Tracking System <submit@bugs.debian.org>
- Subject: unblock: dovecot-fts-xapian/1.4.9a-1
- From: Joseph Nahmias <joe@nahmias.net>
- Date: Tue, 22 Jun 2021 22:50:00 -0400
- Message-id: <162441660069.17917.15246764066276302828.reportbug@brain.nahmias.net>
Package: release.debian.org
Severity: normal
User: release.debian.org@packages.debian.org
Usertags: unblock
Please unblock package dovecot-fts-xapian
This version (1.4.9a) fixes a number of important bugs in the indexer including:
+ fix indexing of attachments, closes: #985654
+ fix indexing of accented characters
+ fix memory errors / segfaults when indexing large mailboxes
Source debdiff from 1.4.7-1 (currently in testing) to 1.4.9a-1 is attached
here. Please let me know when approved so I can upload to unstable.
unblock dovecot-fts-xapian/1.4.9a-1
Thanks,
--Joe
diffstat for dovecot-fts-xapian-1.4.7 dovecot-fts-xapian-1.4.9a
.gitignore | 65 ++++++++++
Makefile.am | 4
PACKAGES/RPM/README.md | 20 +++
PACKAGES/RPM/fts-xapian.spec | 41 ++++++
README.md | 46 +++++--
configure.ac | 2
debian/changelog | 11 +
debian/watch | 4
fts-xapian-config.h.in | 2
src/fts-backend-xapian-functions.cpp | 175 +++++++++++++++++++++--------
src/fts-backend-xapian.cpp | 211 ++++++++++++++++++-----------------
src/fts-xapian-plugin.c | 2
src/fts-xapian-plugin.h | 9 -
13 files changed, 425 insertions(+), 167 deletions(-)
diff -Nru -w dovecot-fts-xapian-1.4.7/.gitignore dovecot-fts-xapian-1.4.9a/.gitignore
--- dovecot-fts-xapian-1.4.7/.gitignore 1969-12-31 19:00:00.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/.gitignore 2021-04-24 16:27:55.000000000 -0400
@@ -0,0 +1,65 @@
+# http://www.gnu.org/software/automake
+
+Makefile.in
+/ar-lib
+/mdate-sh
+/py-compile
+/test-driver
+/ylwrap
+.deps/
+.dirstamp
+
+# http://www.gnu.org/software/autoconf
+
+autom4te.cache
+/autoscan.log
+/autoscan-*.log
+/aclocal.m4
+/compile
+/config.guess
+/config.h.in
+/config.log
+/config.status
+/config.sub
+/configure
+/configure.scan
+/depcomp
+/install-sh
+/missing
+/stamp-h1
+/stamp-h2
+/stamp.h
+
+# https://www.gnu.org/software/libtool/
+
+/ltmain.sh
+/libtool
+
+# http://www.gnu.org/software/texinfo
+
+/texinfo.tex
+
+# http://www.gnu.org/software/m4/
+
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+
+# Generated Makefile
+# (meta build system like autotools,
+# can automatically generate from config.status script
+# (which is called by configure script))
+Makefile
+
+/dummy-config.h
+/dummy-config.h.in
+/fts-xapian-config.h
+/run-test.sh
+
+src/*.o
+src/*.lo
+src/*.la
+
+src/.libs/**
diff -Nru -w dovecot-fts-xapian-1.4.7/Makefile.am dovecot-fts-xapian-1.4.9a/Makefile.am
--- dovecot-fts-xapian-1.4.7/Makefile.am 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/Makefile.am 2021-04-24 16:27:55.000000000 -0400
@@ -2,5 +2,5 @@
ACLOCAL_AMFLAGS = -I m4
-PACKAGE_VERSION = "1.4.7"
-VERSION = "1.4.7"
+PACKAGE_VERSION = "1.4.9a"
+VERSION = "1.4.9a"
diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md
--- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md 1969-12-31 19:00:00.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md 2021-04-24 16:27:55.000000000 -0400
@@ -0,0 +1,20 @@
+As root:
+
+ Install the development environment and required devel packages:
+ -- dnf groupinstall "Development Tools"
+ -- dnf install rpm-build rpm-devel rpmlint make coreutils diffutils patch rpmdevtools
+ -- dnf install dovecot-devel dovecot libicu-devel icu xapian-core xapian-core-devel
+
+As a normal user:
+
+ Create the ~/rpmbuild tree as a normal user (never build rpms as root):
+ -- rpmdev-setuptree
+ Place the spec file under:
+ ~/rpmbuild/SPECS/fts-xapian.spec
+ Place the tar.gz sources under:
+ ~/rpmbuild/SOURCES/fts-xapian-1.4.9a.tar.gz
+ Generate the binary rpm with:
+ -- QA_RPATHS=$(( 0x0001|0x0010 )) rpmbuild -bb ~/rpmbuild/SPECS/fts-xapian.spec
+
+Your RPM packages will be under ~/rpmbuild/RPMS/x86_64/
+
diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec
--- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec 1969-12-31 19:00:00.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec 2021-04-24 16:27:55.000000000 -0400
@@ -0,0 +1,41 @@
+Name: fts-xapian
+Version: 1.4.9a
+Release: 1%{?dist}
+Summary: Dovecot FTS plugin based on Xapian
+
+License: LGPL-2.1
+URL: https://github.com/grosjo/fts-xapian
+Source0: fts-xapian-1.4.9a.tar.gz
+
+BuildRequires: xapian-core-devel, libicu-devel, dovecot-devel
+Requires: xapian-core, xapian-core-libs, dovecot
+
+%description
+This project intends to provide a straightforward, simple and maintenance free, way to configure FTS plugin for Dovecot, leveraging the efforts by the Xapian.org team.
+
+This effort came after Dovecot team decided to deprecate "fts_squat" included in the dovecot core, and due to the complexity of the Solr plugin capabilitles, un-needed for most users.
+
+
+%prep
+%autosetup
+autoreconf -vi
+./configure --with-dovecot=/usr/lib64/dovecot
+
+
+%build
+make %{?_smp_mflags}
+
+
+%install
+%make_install
+
+
+%files
+/usr/lib64/dovecot/lib21_fts_xapian_plugin.la
+/usr/lib64/dovecot/lib21_fts_xapian_plugin.so
+/usr/lib64/dovecot/lib21_fts_xapian_plugin.a
+
+
+%changelog
+* Tue Apr 6 2021 xapian
+-
diff -Nru -w dovecot-fts-xapian-1.4.7/README.md dovecot-fts-xapian-1.4.9a/README.md
--- dovecot-fts-xapian-1.4.7/README.md 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/README.md 2021-04-24 16:27:55.000000000 -0400
@@ -79,12 +79,14 @@
plugin = fts fts_xapian (...)
fts = xapian
- fts_xapian = partial=3 full=20 attachments=0 verbose=0
+ fts_xapian = partial=3 full=20 verbose=0
fts_autoindex = yes
fts_enforced = yes
fts_autoindex_exclude = \Trash
+
+ fts_decoder = decode2text // To index attachements
(...)
}
@@ -92,27 +94,48 @@
service indexer-worker {
vsz_limit = 2G // or above (or 0 if you have rather large memory usable on your server, which is preferred for performance)
}
+
+service decode2text {
+ executable = script /usr/libexec/dovecot/decode2text.sh
+ user = dovecot
+ unix_listener decode2text {
+ mode = 0666
+ }
+}
(...)
```
-Partial & full parameters : 3 and 20 are the NGram values for header fields, which means the keywords created for fields (To, Cc, ...) are between 3 and 20 chars long.
-Full words are also added by default (if not longer than 245 chars, which is the limit of Xapian capability).
+
+Indexing options
+----------------
+
+| Option | Description | Possible values | Default value |
+|----------------|--------------------------------|--------------------------------------|---------------|
+| partial & full | NGram values for header fields | between 3 and 20 characters | 3 & 20 |
+| verbose | Logs verbosity | 0 (silent), 1 (verbose) or 2 (debug) | 0 |
+
+NGrams details
+--------------
+
+The partial & full parameters are the NGram values for header fields, which means the keywords created for fields (To,
+Cc, ...) are between 3 and 20 chars long. Full words are also added by default (if not longer than 245 chars, which is
+the limit of Xapian capability).
Example: "<john@doe>" will create joh, ohn, hn@, ..., john@d, ohn@do, ..., and finally john@doe as searchable keywords.
-Set "verbose=1" to see verbose messages in the log, "verbose=2" for debug
-Set "attachments=1" if you want to index attachments (this works only for text attachments)
+Index updating
+--------------
-Restart Dovecot:
+Just restart Dovecot:
-```
+```sh
sudo servicectl restart dovecot
```
-If this is not a fresh install of dovecot, you need to re-index your mailboxes
+If this is not a fresh install of dovecot, you need to re-index your mailboxes:
-```
+```sh
doveadm index -A -q \*
```
@@ -121,7 +144,8 @@
You shall put in a cron the following command (for daily run for instance) :
-```
+
+```sh
doveadm fts optimize -A
```
@@ -131,4 +155,6 @@
Please submit requests/bugs via the [GitHub issue tracker](https://github.com/grosjo/fts-xapian/issues).
+A Matrix Room exists also at : #xapian-dovecot:grosjo.net
+
Thanks to Aki Tuomi <aki.tuomi@open-xchange.com>, Stephan Bosch <stephan@rename-it.nl>, Paul Hecker <paul@iwascoding.com>
diff -Nru -w dovecot-fts-xapian-1.4.7/configure.ac dovecot-fts-xapian-1.4.9a/configure.ac
--- dovecot-fts-xapian-1.4.7/configure.ac 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/configure.ac 2021-04-24 16:27:55.000000000 -0400
@@ -1,4 +1,4 @@
-AC_INIT([Dovecot FTS Xapian], [1.4.7], [jom@grosjo.net], [dovecot-fts-xapian])
+AC_INIT([Dovecot FTS Xapian], [1.4.9a], [jom@grosjo.net], [dovecot-fts-xapian])
AC_CONFIG_AUX_DIR([.])
AC_CONFIG_SRCDIR([src])
AC_CONFIG_MACRO_DIR([m4])
diff -Nru -w dovecot-fts-xapian-1.4.7/debian/changelog dovecot-fts-xapian-1.4.9a/debian/changelog
--- dovecot-fts-xapian-1.4.7/debian/changelog 2021-01-31 21:35:02.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/debian/changelog 2021-06-22 22:40:48.000000000 -0400
@@ -1,3 +1,14 @@
+dovecot-fts-xapian (1.4.9a-1) unstable; urgency=medium
+
+ * [2da6c89] d/watch: allow non-numbers in version
+ * [18c496d] New upstream version 1.4.9a
+ + fix indexing of attachments, closes: #985654
+ + fix indexing of accented characters
+ + fix memory errors / segfaults when indexing large mailboxes
+ + handle indexing of virtual folders
+
+ -- Joseph Nahmias <jello@debian.org> Tue, 22 Jun 2021 22:40:48 -0400
+
dovecot-fts-xapian (1.4.7-1) unstable; urgency=medium
* [e3bdb1b] fix VCS paths
diff -Nru -w dovecot-fts-xapian-1.4.7/debian/watch dovecot-fts-xapian-1.4.9a/debian/watch
--- dovecot-fts-xapian-1.4.7/debian/watch 2020-10-18 12:23:30.000000000 -0400
+++ dovecot-fts-xapian-1.4.9a/debian/watch 2021-06-22 22:10:41.000000000 -0400
@@ -3,6 +3,6 @@
version=4
-opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" \
+opts="filenamemangle=s%(?:.*?)?v?(\d.*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" \
https://github.com/grosjo/fts-xapian/tags \
- (?:.*?/)?v?(\d[\d.]*)\.tar\.gz
+ (?:.*?/)?v?(\d.*)\.tar\.gz
diff -Nru -w dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in
--- dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in 2021-04-24 16:27:55.000000000 -0400
@@ -1,2 +1,2 @@
#define FTS_XAPIAN_NAME "Dovecot FTS Xapian"
-#define FTS_XAPIAN_VERSION "1.4.7"
+#define FTS_XAPIAN_VERSION "1.4.9a"
diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp
--- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp 2021-04-24 16:27:55.000000000 -0400
@@ -26,6 +26,8 @@
class XQuerySet
{
+ private:
+ icu::Transliterator *accentsConverter;
public:
char * header;
char * text;
@@ -44,6 +46,7 @@
header=NULL;
text=NULL;
global_neg=false;
+ accentsConverter=NULL;
}
XQuerySet(bool is_and, bool is_neg, long l)
@@ -55,6 +58,7 @@
text=NULL;
global_and=is_and;
global_neg=is_neg;
+ accentsConverter=NULL;
}
~XQuerySet()
@@ -68,6 +72,7 @@
}
if(qsize>0) i_free(qs);
qsize=0; qs=NULL;
+ if(accentsConverter != NULL) delete(accentsConverter);
}
void add(const char * h,const char * t)
@@ -80,11 +85,8 @@
if(h==NULL) return;
if(t==NULL) return;
- icu::StringPiece sp_h(h);
- icu::UnicodeString h2 = icu::UnicodeString::fromUTF8(sp_h);
-
- icu::StringPiece sp_t(t);
- icu::UnicodeString t2 = icu::UnicodeString::fromUTF8(sp_t);
+ icu::UnicodeString h2 = icu::UnicodeString::fromUTF8(icu::StringPiece(h));
+ icu::UnicodeString t2 = icu::UnicodeString::fromUTF8(icu::StringPiece(t));
add(&h2,&t2,is_neg);
}
@@ -154,6 +156,19 @@
std::string tmp1;
h->toUTF8String(tmp1);
char * h2 = i_strdup(tmp1.c_str());
+
+ if(accentsConverter == NULL)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ accentsConverter = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
+ if(U_FAILURE(status))
+ {
+ i_error("FTS Xapian: Can not allocate ICU translator (2)");
+ accentsConverter = NULL;
+ }
+ }
+ if(accentsConverter != NULL) accentsConverter->transliterate(*t);
+
std::string tmp2;
t->toUTF8String(tmp2);
char * t2 = i_strdup(tmp2.c_str());
@@ -178,7 +193,7 @@
}
if(i>=HDRS_NB)
{
- i_error("FTS Xapian: Unknown header (lookup) '%s'",h2);
+ if(verbose>1) i_error("FTS Xapian: Unknown header (lookup) '%s'",h2);
i_free(h2); i_free(t2);
return;
}
@@ -299,8 +314,6 @@
char *s = i_strdup(get_string().c_str());
- if(verbose>0) { i_info("FTS Xapian: Query= %s",s); }
-
qp->set_database(*db);
Xapian::Query * q = new Xapian::Query(qp->parse_query(s,Xapian::QueryParser::FLAG_DEFAULT));// | Xapian::QueryParser::FLAG_PARTIAL));
@@ -317,6 +330,7 @@
long partial,full,hardlimit;
const char * prefix;
bool onlyone;
+ icu::Transliterator *accentsConverter;
public:
char ** data;
@@ -334,6 +348,7 @@
hardlimit=XAPIAN_TERM_SIZELIMIT-strlen(prefix);
onlyone=false;
if(strcmp(prefix,"XMID")==0) onlyone=true;
+ accentsConverter = NULL;
}
~XNGram()
@@ -348,14 +363,14 @@
i_free(data);
}
data=NULL;
+ if(accentsConverter != NULL) delete(accentsConverter);
}
void add(const char * s)
{
if(s==NULL) return;
- icu::StringPiece sp(s);
- icu::UnicodeString d = icu::UnicodeString::fromUTF8(sp);
+ icu::UnicodeString d = icu::UnicodeString::fromUTF8(icu::StringPiece(s));
add(&d);
}
@@ -401,6 +416,18 @@
long l = d->length();
if(l<partial) return;
+ if(accentsConverter == NULL)
+ {
+ UErrorCode status = U_ZERO_ERROR;
+ accentsConverter = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
+ if(U_FAILURE(status))
+ {
+ i_error("FTS Xapian: Can not allocate ICU translator (1)");
+ accentsConverter = NULL;
+ }
+ }
+ if(accentsConverter != NULL) accentsConverter->transliterate(*d);
+
if(onlyone)
{
add_stem(d);
@@ -470,6 +497,13 @@
}
};
+static long fts_backend_xapian_current_time()
+{
+ struct timeval tp;
+ gettimeofday(&tp, NULL);
+ return tp.tv_sec * 1000 + tp.tv_usec / 1000;
+}
+
static long fts_backend_xapian_memory_used() // KB
{
FILE* file = fopen("/proc/self/status", "r");
@@ -522,7 +556,7 @@
return 0;
}
-static bool fts_backend_xapian_test_memory()
+static bool fts_backend_xapian_test_memory(struct xapian_fts_backend *backend, long add)
{
rlim_t limit;
@@ -531,15 +565,23 @@
long used = fts_backend_xapian_memory_used();
long fri = fts_backend_xapian_memory_free(); // Free RAM
+ backend->nb_pushes++;
+ long m2 = 2*used/backend->nb_pushes;
+ if(backend->max_push < m2) backend->max_push=m2;
+ m2=backend->max_push;
+
+ add = long(add/1024.0);
+
if(m<1)
{
- if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB, Free = %ld MB",long(used/1024),long(fri/1024));
- return (fri>used/2);
+ if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld pushes), Free = %ld MB, Additional data %ld KB, Estimated required = %ld MB",long(used/1024), backend->nb_pushes, long(fri/1024), add, long(m2/1024));
+ return ((fri>XAPIAN_MIN_RAM*1024)&&(fri>m2));
+ }
+ else
+ {
+ if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld%%) (%ld pushes), Limit = %ld MB, Free = %ld MB, Additional data %ld KB, Estimated required = %ld MB",long(used/1024),long(used*100.0/m),backend->nb_pushes,long(m/1024),long(fri/1024), add, long(m2/1024));
+ return ((fri>XAPIAN_MIN_RAM*1024)&&(m>(used+m2))&&(fri>m2));
}
-
- if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld%%), Limit = %ld MB, Free = %ld MB",long(used/1024),long(used*100.0/m),long(m/1024),long(fri/1024));
-
- return ((m>used*3.0/2)&&(fri>used/2));
}
static bool fts_backend_xapian_open_readonly(struct xapian_fts_backend *backend, Xapian::Database ** dbr)
@@ -598,9 +640,7 @@
if(backend->old_guid != NULL)
{
/* Performance calculator*/
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - backend->perf_dt;
+ long dt = fts_backend_xapian_current_time() - backend->perf_dt;
double r=0;
if(dt>0)
{
@@ -618,8 +658,12 @@
static void fts_backend_xapian_release(struct xapian_fts_backend *backend, const char * reason, long commit_time)
{
+ bool err=false;
+
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_release (%s)",reason);
+ if(commit_time<1) commit_time = fts_backend_xapian_current_time();
+
if(backend->dbw !=NULL)
{
try
@@ -630,6 +674,7 @@
catch(Xapian::Error e)
{
i_error("FTS Xapian: %s : %s - %s",reason,e.get_type(),e.get_error_string());
+ err=true;
}
delete(backend->dbw);
backend->dbw = NULL;
@@ -637,12 +682,27 @@
backend->commit_time = commit_time;
}
+ if(err)
+ {
+ if(verbose>0) i_info("FTS Xapian: Re-creating index database due to error");
+ try
+ {
+ Xapian::WritableDatabase * db = new Xapian::WritableDatabase(backend->db,Xapian::DB_CREATE_OR_OVERWRITE | Xapian::DB_RETRY_LOCK | Xapian::DB_BACKEND_GLASS);
+ db->close();
+ delete(db);
+ }
+ catch(Xapian::Error e)
+ {
+ i_error("FTS Xapian: Can't re-create Xapian DB (%s) %s : %s - %s",backend->boxname,backend->db,e.get_type(),e.get_error_string());
+ }
+ }
+
+ backend->nb_pushes=0;
+ backend->max_push=0;
+
if(verbose>0)
{
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
- i_info("FTS Xapian: Committed '%s' in %ld ms",reason,current_time - commit_time);
+ i_info("FTS Xapian: Committed '%s' in %ld ms",reason,fts_backend_xapian_current_time() - commit_time);
}
}
@@ -689,9 +749,7 @@
{
Xapian::WritableDatabase * dbw;
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000;
+ long dt = fts_backend_xapian_current_time();
try
{
@@ -747,21 +805,18 @@
dbw->commit();
dbw->close();
delete(dbw);
- gettimeofday(&tp, NULL);
- dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - dt;
+
+ dt = fts_backend_xapian_current_time() - dt;
i_info("FTS Xapian: Expunging '%s' done in %.2f secs",fpath,dt/1000.0);
}
static int fts_backend_xapian_unset_box(struct xapian_fts_backend *backend)
{
- if(verbose>1) i_info("FTS Xapian: Unset box '%s' (%s)",backend->boxname,backend->guid);
+ if(verbose>0) i_info("FTS Xapian: Unset box '%s' (%s)",backend->boxname,backend->guid);
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long commit_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
+ long commit_time = fts_backend_xapian_current_time();
fts_backend_xapian_oldbox(backend);
-
fts_backend_xapian_release(backend,"unset_box",commit_time);
if(backend->db != NULL)
@@ -779,19 +834,47 @@
return 0;
}
+static int fts_backend_xapian_set_path(struct xapian_fts_backend *backend)
+{
+ struct mail_namespace * ns = backend->backend.ns;
+ if(ns->alias_for != NULL)
+ {
+ if(verbose>0) i_info("FTS Xapian: Switching namespace");
+ ns = ns->alias_for;
+ }
+
+ const char * path = mailbox_list_get_root_forced(ns->list, MAILBOX_LIST_PATH_TYPE_INDEX);
+
+ if(backend->path != NULL) i_free(backend->path);
+ backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL);
+
+ if(verbose>0) i_info("FTS Xapian: Index path = %s",backend->path);
+
+ struct stat sb;
+ if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode)))
+ {
+ if (mailbox_list_mkdir_root(backend->backend.ns->list, backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0)
+ {
+ i_error("FTS Xapian: can not create '%s'",backend->path);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int fts_backend_xapian_set_box(struct xapian_fts_backend *backend, struct mailbox *box)
{
if (box == NULL)
{
if(backend->guid != NULL) fts_backend_xapian_unset_box(backend);
- if(verbose>0) i_info("FTS Xapian: Box is empty");
+ if(verbose>1) i_info("FTS Xapian: Box is empty");
return 0;
}
const char * mb;
fts_mailbox_get_guid(box, &mb );
- if(verbose>1) i_info("FTX Xapian: Set box '%s' (%s)",box->name,mb);
+ if(verbose>0) i_info("FTS Xapian: Set box '%s' (%s)",box->name,mb);
if((mb == NULL) || (strlen(mb)<3))
{
@@ -807,11 +890,12 @@
if(backend->guid != NULL) fts_backend_xapian_unset_box(backend);
+ if(fts_backend_xapian_set_path(backend)<0) return -1;
+
struct timeval tp;
long current_time;
- gettimeofday(&tp, NULL);
- current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
+ current_time = fts_backend_xapian_current_time();
backend->commit_updates = 0;
backend->commit_time = current_time;
@@ -837,7 +921,6 @@
}
i_free(t);
-
/* Performance calculator*/
backend->perf_dt = current_time;
backend->perf_uid=0;
@@ -917,15 +1000,15 @@
{
bool ok=true;
- if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_hdr");
+ if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_hdr");
Xapian::WritableDatabase * dbx = backend->dbw;
long p = backend->partial;
long f = backend->full;
- if(data->length()<p) { return true; }
+ if(data->length()<p) return true;
- if(strlen(field)<1) { return true; }
+ if(strlen(field)<1) return true;
long i=0;
while((i<HDRS_NB) && (strcmp(field,hdrs_emails[i])!=0))
@@ -1006,7 +1089,7 @@
}
catch (std::bad_alloc& ba)
{
- i_error("FTS Xapian: Memory error '%s'",ba.what());
+ i_info("FTS Xapian: Memory too low (hdr) '%s'",ba.what());
ok = false;
}
}
@@ -1020,13 +1103,13 @@
{
bool ok = true;
- if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_text");
+ if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_text");
Xapian::WritableDatabase * dbx = backend->dbw;
long p = backend->partial;
long f = backend->full;
- if(data->length()<p) { return true; }
+ if(data->length()<p) return true;
XQuerySet * xq = new XQuerySet();
@@ -1134,7 +1217,7 @@
}
catch (std::bad_alloc& ba)
{
- i_error("FTS Xapian: Memory error '%s'",ba.what());
+ i_info("FTS Xapian: Memory too low (text) '%s'",ba.what());
ok = false;
}
}
diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp
--- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp 2021-04-24 16:27:55.000000000 -0400
@@ -6,7 +6,9 @@
#include "fts-xapian-plugin.h"
}
#include <dirent.h>
+#include <unicode/utypes.h>
#include <unicode/unistr.h>
+#include <unicode/translit.h>
#include <sys/time.h>
#define XAPIAN_FILE_PREFIX "xapian-indexes"
@@ -15,6 +17,7 @@
#define XAPIAN_COMMIT_TIMEOUT 300L
#define XAPIAN_WILDCARD "wldcrd"
#define XAPIAN_EXPUNGE_HEADER 9
+#define XAPIAN_MIN_RAM 200L
#define HDRS_NB 11
static const char * hdrs_emails[HDRS_NB] = { "uid", "subject", "from", "to", "cc", "bcc", "messageid", "listid", "body", "expungeheader", "" };
@@ -25,9 +28,8 @@
struct xapian_fts_backend
{
struct fts_backend backend;
- char * path;
+ char * path = NULL;
long partial,full;
- bool attachments;
char * guid;
char * boxname;
@@ -45,12 +47,16 @@
long perf_nb;
long perf_uid;
long perf_dt;
+
+ long nb_pushes;
+ long max_push;
};
struct xapian_fts_backend_update_context
{
struct fts_backend_update_context ctx;
char * tbi_field=NULL;
+ bool isattachment=false;
bool tbi_isfield;
uint32_t tbi_uid=0;
};
@@ -69,10 +75,10 @@
static int fts_backend_xapian_init(struct fts_backend *_backend, const char **error_r)
{
- if(verbose>0) i_info("fts_backend_xapian_init");
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend;
+
+ if(verbose>0) i_info("fts_backend_xapian_init : %s",_backend->name);
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)_backend;
const char *const *tmp, *env;
long len;
@@ -83,11 +89,13 @@
backend->path = NULL;
backend->old_guid = NULL;
backend->old_boxname = NULL;
- backend->attachments = false;
verbose = 0;
backend->partial = 0;
backend->full = 0;
+ backend->nb_pushes=0;
+ backend->max_push=0;
+
env = mail_user_plugin_getenv(_backend->ns->user, "fts_xapian");
if (env == NULL)
{
@@ -114,7 +122,7 @@
}
else if (strncmp(*tmp,"attachments=",12)==0)
{
- if(atol(*tmp + 12)>0) backend->attachments=true;
+ // Legacy
}
else
{
@@ -144,28 +152,16 @@
return -1;
}
- const char * path = mailbox_list_get_root_forced(_backend->ns->list, MAILBOX_LIST_PATH_TYPE_INDEX);
- backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL);
-
- struct stat sb;
- if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode)))
- {
- if (mailbox_list_mkdir_root(backend->backend.ns->list, backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0)
- {
- i_error("FTS Xapian: can not create '%s'",backend->path);
- return -1;
- }
- }
+ if(fts_backend_xapian_set_path(backend)<0) return -1;
- if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld attachments=%d verbose=%d",backend->partial,backend->full,backend->attachments,verbose);
+ if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld verbose=%d",backend->partial,backend->full,verbose);
return 0;
}
static void fts_backend_xapian_deinit(struct fts_backend *_backend)
{
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)_backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend;
if(verbose>0) i_info("FTS Xapian: Deinit %s)",backend->path);
@@ -188,8 +184,7 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_get_last_uid");
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)_backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend;
*last_uid_r = 0;
@@ -242,18 +237,12 @@
static int fts_backend_xapian_update_deinit(struct fts_backend_update_context *_ctx)
{
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)ctx->ctx.backend;
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend;
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_deinit (%s)",backend->path);
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
-
- fts_backend_xapian_release(backend,"update_deinit", current_time);
+ fts_backend_xapian_release(backend,"update_deinit",0);
i_free(ctx);
@@ -264,10 +253,8 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_set_mailbox");
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)ctx->ctx.backend;
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend;
fts_backend_xapian_set_box(backend, box);
}
@@ -276,10 +263,8 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_expunge");
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)ctx->ctx.backend;
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend;
if(!fts_backend_xapian_check_access(backend))
{
@@ -332,11 +317,9 @@
{
if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_set_build_key");
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *)ctx->ctx.backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend;
ctx->tbi_isfield=false;
ctx->tbi_uid=0;
@@ -364,9 +347,7 @@
if((backend->perf_nb - backend->perf_pt)>=200)
{
backend->perf_pt = backend->perf_nb;
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - backend->perf_dt;
+ long dt = fts_backend_xapian_current_time() - backend->perf_dt;
double r=0;
if(dt>0)
{
@@ -384,19 +365,27 @@
if(verbose>1) i_info("FTS Xapian: New part (Header=%s,Type=%s,Disposition=%s)",field,type,disposition);
// Verify content-type
- if((type != NULL) && (strncmp(type,"text",4)!=0))
+
+ if(key->type == FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY)
{
- if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s'",type);
+ if(verbose>0) i_info("FTS Xapian: Skipping binary part of type '%s'",type);
return FALSE;
}
- // Verify content-disposition
- if((disposition != NULL) && (!backend->attachments) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL)))
+ if((type != NULL) && (strncmp(type,"text",4)!=0) && ((disposition==NULL) || ((strstr(disposition,"filename=")==NULL) && (strstr(disposition,"attachment")==NULL))))
{
- if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s' and disposition '%s'",type,disposition);
+ if(verbose>0) i_info("FTS Xapian: Non-binary & non-text part of type '%s'",type);
return FALSE;
}
+ // Verify content-disposition
+ ctx->isattachment=false;
+ if((disposition != NULL) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL)))
+ {
+ if(verbose>0) i_info("FTS Xapian: Found part as attachment of type '%s' and disposition '%s'",type,disposition);
+ ctx->isattachment=true;
+ }
+
// Fill-in field
if(field==NULL)
{
@@ -422,7 +411,7 @@
}
if(i>=HDRS_NB)
{
- if(verbose>1) i_info("FTS Xapian: Unknown header (indexing) '%s'",ctx->tbi_field);
+ if(verbose>1) i_info("FTS Xapian: Unknown header '%s' of part",ctx->tbi_field);
i_free(ctx->tbi_field);
ctx->tbi_field=NULL;
return FALSE;
@@ -447,10 +436,9 @@
static void fts_backend_xapian_update_unset_build_key(struct fts_backend_update_context *_ctx)
{
- if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_unset_build_key");
+ if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_unset_build_key");
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
if(ctx->tbi_field!=NULL)
{
@@ -464,26 +452,32 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_refresh");
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) _backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend;
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
-
- fts_backend_xapian_release(backend,"refresh", current_time);
+ fts_backend_xapian_release(backend,"refresh", 0);
return 0;
}
static int fts_backend_xapian_update_build_more(struct fts_backend_update_context *_ctx, const unsigned char *data, size_t size)
{
- if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_build_more");
+ struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *) ctx->ctx.backend;
- struct xapian_fts_backend_update_context *ctx =
- (struct xapian_fts_backend_update_context *)_ctx;
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) ctx->ctx.backend;
+ if(verbose>1)
+ {
+ if(ctx->isattachment)
+ {
+ char * t = i_strdup("NODATA");
+ if(data != NULL) { i_free(t); t = i_strndup(data,40); }
+ i_info("FTS Xapian: Indexing part as attachment (data like '%s')",t);
+ i_free(t);
+ }
+ else
+ {
+ i_info("FTS Xapian: Indexing part as text");
+ }
+ }
if(ctx->tbi_uid<1) return 0;
@@ -499,13 +493,10 @@
return -1;
}
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
-
- if(!fts_backend_xapian_test_memory())
+ if(!fts_backend_xapian_test_memory(backend,d2.length()))
{
- fts_backend_xapian_release(backend,"Low memory indexing", current_time);
+ if(verbose>0) i_info("FTS Xapian: Warning Low memory");
+ fts_backend_xapian_release(backend,"Low memory indexing", 0);
if(!fts_backend_xapian_check_access(backend))
{
i_error("FTS Xapian: Buildmore: Can not open db (2)");
@@ -518,16 +509,41 @@
if(ctx->tbi_isfield)
{
ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2);
+ if(!ok)
+ {
+ if(verbose>0) i_info("FTS Xapian: Flushing memory and retrying");
+ fts_backend_xapian_release(backend,"Flushing memory indexing hdr", 0);
+ if(fts_backend_xapian_check_access(backend))
+ {
+ ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2);
+ }
+ else
+ {
+ i_error("FTS Xapian: Buildmore: Can not open db (3)");
+ }
+ }
}
else
{
ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2);
+ if(!ok)
+ {
+ if(verbose>0) i_info("FTS Xapian: Flushing memory and retrying");
+ fts_backend_xapian_release(backend,"Flushing memory indexing text", 0);
+ if(fts_backend_xapian_check_access(backend))
+ {
+ ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2);
+ }
+ else
+ {
+ i_error("FTS Xapian: Buildmore: Can not open db (4)");
+ }
+ }
}
backend->commit_updates++;
- gettimeofday(&tp, NULL);
- current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
+ long current_time = fts_backend_xapian_current_time();
if( (!ok) || (backend->commit_updates>XAPIAN_COMMIT_ENTRIES) || ((current_time - backend->commit_time) > XAPIAN_COMMIT_TIMEOUT*1000) )
{
@@ -541,8 +557,7 @@
static int fts_backend_xapian_optimize(struct fts_backend *_backend)
{
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) _backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend;
i_info("FTS Xapian: fts_backend_xapian_optimize '%s'",backend->path);
@@ -580,8 +595,7 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_rescan");
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) _backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend;
struct stat sb;
if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode)))
@@ -631,16 +645,11 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup");
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) _backend;
+ struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend;
- if(fts_backend_xapian_set_box(backend, box)<0)
- return -1;
+ if(fts_backend_xapian_set_box(backend, box)<0) return -1;
- /* Performance calc */
- struct timeval tp;
- gettimeofday(&tp, NULL);
- long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000;
+ long current_time = fts_backend_xapian_current_time();
Xapian::Database * dbr;
@@ -671,6 +680,7 @@
XResultSet * r=fts_backend_xapian_query(dbr,qs);
long n=r->size;
+ if(verbose>0) { i_info("FTS Xapian: QUery '%s' -> %ld results",qs->get_string().c_str(),n); }
i_array_init(&(result->definite_uids),r->size);
@@ -696,9 +706,7 @@
/* Performance calc */
if(verbose>0)
{
- gettimeofday(&tp, NULL);
- long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - current_time;
- i_info("FTS Xapian: %ld results in %ld ms",n,dt);
+ i_info("FTS Xapian: %ld results in %ld ms",n,fts_backend_xapian_current_time() - current_time);
}
return 0;
}
@@ -707,10 +715,8 @@
{
if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup_multi");
- struct xapian_fts_backend *backend =
- (struct xapian_fts_backend *) _backend;
-
ARRAY(struct fts_result) box_results;
+
struct fts_result *box_result;
int i;
@@ -719,12 +725,22 @@
{
box_result = array_append_space(&box_results);
box_result->box = boxes[i];
- if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, box_result)<1) return -1;
+ if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, box_result)<0)
+ {
+ void* p=&box_results;
+ p_free(result->pool, p);
+ return -1;
+ }
}
+
+ array_append_zero(&box_results);
+ result->box_results = array_idx_modifiable(&box_results, 0);
+
return 0;
}
-struct fts_backend fts_backend_xapian = {
+struct fts_backend fts_backend_xapian =
+{
.name = "xapian",
.flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS,
.v = {
@@ -748,4 +764,3 @@
NULL
}
};
-
diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c
--- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c 2021-04-24 16:27:55.000000000 -0400
@@ -7,13 +7,11 @@
void fts_xapian_plugin_init(struct module *module ATTR_UNUSED)
{
- //i_warning("fts_xapian_plugin_init");
fts_backend_register(&fts_backend_xapian);
}
void fts_xapian_plugin_deinit(void)
{
- //i_warning("fts_xapian_plugin_deinit");
fts_backend_unregister(fts_backend_xapian.name);
}
diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h
--- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h 2021-01-31 14:06:29.000000000 -0500
+++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h 2021-04-24 16:27:55.000000000 -0400
@@ -5,13 +5,12 @@
#define FTS_XAPIAN_PLUGIN_H
#include "lib.h"
-#include "mail-storage-private.h"
-#include "mailbox-list-private.h"
-#include "mail-search.h"
+#include "fts-api-private.h"
#include "fts-api.h"
-#include "module-context.h"
+#include "mail-search.h"
+#include "mail-storage-private.h"
#include "mail-user.h"
-#include "fts-api-private.h"
+#include "module-context.h"
#include "restrict-process-size.h"
extern const char *fts_xapian_plugin_dependencies[];
--- End Message ---