Bug#401561: Try this instead...
This new test version has been working well for me. See attached diff and
modified file.
This version does the following differently from the original:
1. It checks the size of the cache first before opening any files (via
directorySize()). This change "fixes" the issue I opened the bug on. Now
with BT client running, kio_http_cache_cleaner loads and exits within a few
seconds with very little swap thrasing.
2. If the cache is big enough to require cleaning, it now cleans it down to
20% under the size ceiling. This should reduce the work it needs to do over
the long run.
3. Skips the sort step when the user has requested to clear all files. This
doesn't seem to help much, but in the spirit of avoiding useless work it is
here.
I suspect that the swap thrashing occurred because the original code opened
every file in the cache to read the HTTP expiration header inside in
scanDirectory(). (On my 200MB cache, that step opened about 12000 files
needlessly.) This clashed with the BT client which had several gigs of files
open also.
I formally declare this patch to be in the public domain. It is hoped that
changes along these lines will be adopted by upstream.
/*
This file is part of KDE
Copyright (C) 1999-2000 Waldo Bastian (bastian@kde.org)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
//----------------------------------------------------------------------------
//
// KDE Http Cache cleanup tool
// $Id: http_cache_cleaner.cpp 362866 2004-11-14 10:54:13Z mueller $
#include <time.h>
#include <stdlib.h>
#include <qdir.h>
#include <qstring.h>
#include <qptrlist.h>
#include <kinstance.h>
#include <klocale.h>
#include <kcmdlineargs.h>
#include <kglobal.h>
#include <kstandarddirs.h>
#include <dcopclient.h>
#include <kprotocolmanager.h>
#include <unistd.h>
#include <kdebug.h>
time_t currentDate;
int m_maxCacheAge;
int m_maxCacheSize;
static const char appName[] = "kio_http_cache_cleaner";
static const char description[] = I18N_NOOP("KDE HTTP cache maintenance tool");
static const char version[] = "1.0.0";
static const KCmdLineOptions options[] =
{
{"clear-all", I18N_NOOP("Empty the cache"), 0},
KCmdLineLastOption
};
struct FileInfo {
QString name;
int size; // Size in Kb.
int age;
};
template class QPtrList<FileInfo>;
class FileInfoList : public QPtrList<FileInfo>
{
public:
FileInfoList() : QPtrList<FileInfo>() { }
int compareItems(QPtrCollection::Item item1, QPtrCollection::Item item2)
{ return ((FileInfo *)item1)->age - ((FileInfo *)item2)->age; }
};
// !START OF SYNC!
// Keep the following in sync with the cache code in http.cc
#define CACHE_REVISION "7\n"
FileInfo *readEntry( const QString &filename)
{
QCString CEF = QFile::encodeName(filename);
FILE *fs = fopen( CEF.data(), "r");
if (!fs)
return 0;
char buffer[401];
bool ok = true;
// CacheRevision
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
if (ok && (strcmp(buffer, CACHE_REVISION) != 0))
ok = false;
// Full URL
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
time_t creationDate;
int age =0;
// Creation Date
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
if (ok)
{
creationDate = (time_t) strtoul(buffer, 0, 10);
age = (int) difftime(currentDate, creationDate);
if ( m_maxCacheAge && ( age > m_maxCacheAge))
{
ok = false; // Expired
}
}
// Expiration Date
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
if (ok)
{
//WABA: It seems I slightly misunderstood the meaning of "Expire:" header.
#if 0
time_t expireDate;
expireDate = (time_t) strtoul(buffer, 0, 10);
if (expireDate && (expireDate < currentDate))
ok = false; // Expired
#endif
}
// ETag
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
if (ok)
{
// Ignore ETag
}
// Last-Modified
if (ok && (!fgets(buffer, 400, fs)))
ok = false;
if (ok)
{
// Ignore Last-Modified
}
fclose(fs);
if (ok)
{
FileInfo *info = new FileInfo;
info->age = age;
return info;
}
unlink( CEF.data());
return 0;
}
// Keep the above in sync with the cache code in http.cc
// !END OF SYNC!
void scanDirectory(FileInfoList &fileEntries, const QString &name, const QString &strDir)
{
QDir dir(strDir);
if (!dir.exists()) return;
QFileInfoList *newEntries = (QFileInfoList *) dir.entryInfoList();
if (!newEntries) return; // Directory not accessible ??
for(QFileInfo *qFileInfo = newEntries->first();
qFileInfo;
qFileInfo = newEntries->next())
{
if (qFileInfo->isFile())
{
FileInfo *fileInfo = readEntry( strDir + "/" + qFileInfo->fileName());
if (fileInfo)
{
fileInfo->name = name + "/" + qFileInfo->fileName();
fileInfo->size = (qFileInfo->size() + 1023) / 1024;
fileEntries.append(fileInfo);
}
}
}
}
// KAL
#ifdef __linux
#include <dirent.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
int directorySize(const char * directory) {
DIR * dir;
struct dirent * dirEntry;
int size = 0;
struct stat fileStats;
char subdir[FILENAME_MAX + 1];
if (NULL == (dir = opendir(directory))) {
fprintf(stderr, "Unable to open directory %s: %s\n", directory, strerror(errno));
return 0;
}
while (NULL != (dirEntry = readdir(dir))) {
// if (errno) {
// fprintf(stderr, "Unable to read directory %s: %s\n", directory, strerror(errno));
// break;
// }
if ((dirEntry->d_name[0] == '.') && (dirEntry->d_name[1] == 0)) {
// Current dir, skip
continue;
}
if ((dirEntry->d_name[0] == '.') && (dirEntry->d_name[1] == '.') && (dirEntry->d_name[2] == 0)) {
// Parent dir, skip
continue;
}
snprintf(subdir, FILENAME_MAX, "%s/%s", directory, dirEntry->d_name);
if (lstat(subdir, &fileStats) < 0) {
fprintf(stderr, "Unable to lstat() %s: %s\n", subdir, strerror(errno));
break;
}
size += fileStats.st_size;
if (S_ISDIR(fileStats.st_mode)) {
// Recurse subdirectory
size += directorySize(subdir);
}
}
closedir(dir);
return size;
}
#endif // __linux
extern "C" KDE_EXPORT int kdemain(int argc, char **argv)
{
KLocale::setMainCatalogue("kdelibs");
KCmdLineArgs::init( argc, argv, appName,
I18N_NOOP("KDE HTTP cache maintenance tool"),
description, version, true);
KCmdLineArgs::addCmdLineOptions( options );
KCmdLineArgs *args = KCmdLineArgs::parsedArgs();
bool deleteAll = args->isSet("clear-all");
KInstance ins( appName );
if (!deleteAll)
{
DCOPClient *dcop = new DCOPClient();
QCString name = dcop->registerAs(appName, false);
if (!name.isEmpty() && (name != appName))
{
fprintf(stderr, "%s: Already running! (%s)\n", appName, name.data());
return 0;
}
}
currentDate = time(0);
m_maxCacheAge = KProtocolManager::maxCacheAge();
m_maxCacheSize = KProtocolManager::maxCacheSize();
if (deleteAll)
m_maxCacheSize = -1;
QString strCacheDir = KGlobal::dirs()->saveLocation("cache", "http");
QDir cacheDir( strCacheDir );
if (!cacheDir.exists())
{
fprintf(stderr, "%s: '%s' does not exist.\n", appName, strCacheDir.ascii());
return 0;
}
int totalSize = 0;
#ifdef __linux
// KAL: On my Linux system with 1GB RAM and 390MB swap,
// kio_http_cache_cleaner causes the system to thrash heavily in
// swap, bringing the system to a crawl. It "ought" not to, but
// it does, so I added this low-level check to ensure thrashing
// only occurs if it really needs to. Without this check,
// scanDirectory() will open over 12,000 files needlessly.
char cacheDirName[FILENAME_MAX + 1];
memset(cacheDirName, 0, sizeof(cacheDirName));
strncpy(cacheDirName, strCacheDir.ascii(), FILENAME_MAX);
while(cacheDirName[strlen(cacheDirName)-1] == '/')
{
cacheDirName[strlen(cacheDirName)-1] = 0;
}
kdDebug () << appName << ": m_maxCacheSize = " << m_maxCacheSize << endl;
totalSize = directorySize(cacheDirName);
totalSize = (totalSize + 1023) / 1024;
kdDebug () << appName << ": totalSize = " << totalSize << endl;
if (totalSize > m_maxCacheSize)
{
// Cache is too big, so something will need to be deleted.
// Delete everything down to 20%.
m_maxCacheSize = m_maxCacheSize * 4 / 5;
kdDebug () << appName << ": Cache must shrink, new m_maxCacheSize = " << m_maxCacheSize << endl;
}
else
{
// Cache has more room to grow, so we are done.
kdDebug () << appName << ": Current size of cache = " << totalSize << " kB, no recent objects needed to be deleted." << endl;
return 0;
}
#endif // __linux
QStringList dirs = cacheDir.entryList( );
FileInfoList cachedEntries;
for(QStringList::Iterator it = dirs.begin();
it != dirs.end();
it++)
{
if ((*it)[0] != '.')
{
scanDirectory( cachedEntries, *it, strCacheDir + "/" + *it);
}
}
if (m_maxCacheSize > 0)
{
// Don't bother sorting if the user requested all files be deleted anyway.
cachedEntries.sort();
}
int maxCachedSize = m_maxCacheSize / 2;
for(FileInfo *fileInfo = cachedEntries.first();
fileInfo;
fileInfo = cachedEntries.next())
{
if (fileInfo->size > maxCachedSize)
{
QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
unlink(filename.data());
// kdDebug () << appName << ": Object too big, deleting '" << filename.data() << "' (" << result<< ")" << endl;
}
}
totalSize = 0;
for(FileInfo *fileInfo = cachedEntries.first();
fileInfo;
fileInfo = cachedEntries.next())
{
if ((totalSize + fileInfo->size) > m_maxCacheSize)
{
QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
unlink(filename.data());
// kdDebug () << appName << ": Cache too big, deleting '" << filename.data() << "' (" << fileInfo->size << ")" << endl;
}
else
{
totalSize += fileInfo->size;
// fprintf(stderr, "Keep in cache: %s %d %d total = %d\n", fileInfo->name.ascii(), fileInfo->size, fileInfo->age, totalSize);
}
}
kdDebug () << appName << ": Current size of cache = " << totalSize << " kB." << endl;
return 0;
}
--- http_cache_cleaner.cpp.original 2006-12-04 09:49:36.000000000 -0600
+++ http_cache_cleaner.cpp 2006-12-05 12:28:31.000000000 -0600
@@ -112,7 +112,7 @@
age = (int) difftime(currentDate, creationDate);
if ( m_maxCacheAge && ( age > m_maxCacheAge))
{
- ok = false; // Expired
+ ok = false; // Expired
}
}
@@ -126,7 +126,7 @@
time_t expireDate;
expireDate = (time_t) strtoul(buffer, 0, 10);
if (expireDate && (expireDate < currentDate))
- ok = false; // Expired
+ ok = false; // Expired
#endif
}
@@ -176,17 +176,68 @@
{
if (qFileInfo->isFile())
{
- FileInfo *fileInfo = readEntry( strDir + "/" + qFileInfo->fileName());
- if (fileInfo)
- {
- fileInfo->name = name + "/" + qFileInfo->fileName();
- fileInfo->size = (qFileInfo->size() + 1023) / 1024;
- fileEntries.append(fileInfo);
- }
+ FileInfo *fileInfo = readEntry( strDir + "/" + qFileInfo->fileName());
+ if (fileInfo)
+ {
+ fileInfo->name = name + "/" + qFileInfo->fileName();
+ fileInfo->size = (qFileInfo->size() + 1023) / 1024;
+ fileEntries.append(fileInfo);
+ }
}
}
}
+// KAL
+#ifdef __linux
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+int directorySize(const char * directory) {
+ DIR * dir;
+ struct dirent * dirEntry;
+ int size = 0;
+ struct stat fileStats;
+ char subdir[FILENAME_MAX + 1];
+
+ if (NULL == (dir = opendir(directory))) {
+ fprintf(stderr, "Unable to open directory %s: %s\n", directory, strerror(errno));
+ return 0;
+ }
+ while (NULL != (dirEntry = readdir(dir))) {
+// if (errno) {
+// fprintf(stderr, "Unable to read directory %s: %s\n", directory, strerror(errno));
+// break;
+// }
+ if ((dirEntry->d_name[0] == '.') && (dirEntry->d_name[1] == 0)) {
+ // Current dir, skip
+ continue;
+ }
+ if ((dirEntry->d_name[0] == '.') && (dirEntry->d_name[1] == '.') && (dirEntry->d_name[2] == 0)) {
+ // Parent dir, skip
+ continue;
+ }
+ snprintf(subdir, FILENAME_MAX, "%s/%s", directory, dirEntry->d_name);
+ if (lstat(subdir, &fileStats) < 0) {
+ fprintf(stderr, "Unable to lstat() %s: %s\n", subdir, strerror(errno));
+ break;
+ }
+ size += fileStats.st_size;
+ if (S_ISDIR(fileStats.st_mode)) {
+ // Recurse subdirectory
+ size += directorySize(subdir);
+ }
+ }
+
+ closedir(dir);
+ return size;
+}
+#endif // __linux
+
extern "C" KDE_EXPORT int kdemain(int argc, char **argv)
{
KLocale::setMainCatalogue("kdelibs");
@@ -208,8 +259,8 @@
QCString name = dcop->registerAs(appName, false);
if (!name.isEmpty() && (name != appName))
{
- fprintf(stderr, "%s: Already running! (%s)\n", appName, name.data());
- return 0;
+ fprintf(stderr, "%s: Already running! (%s)\n", appName, name.data());
+ return 0;
}
}
@@ -229,6 +280,44 @@
return 0;
}
+ int totalSize = 0;
+
+#ifdef __linux
+ // KAL: On my Linux system with 1GB RAM and 390MB swap,
+ // kio_http_cache_cleaner causes the system to thrash heavily in
+ // swap, bringing the system to a crawl. It "ought" not to, but
+ // it does, so I added this low-level check to ensure thrashing
+ // only occurs if it really needs to. Without this check,
+ // scanDirectory() will open over 12,000 files needlessly.
+ char cacheDirName[FILENAME_MAX + 1];
+ memset(cacheDirName, 0, sizeof(cacheDirName));
+ strncpy(cacheDirName, strCacheDir.ascii(), FILENAME_MAX);
+ while(cacheDirName[strlen(cacheDirName)-1] == '/')
+ {
+ cacheDirName[strlen(cacheDirName)-1] = 0;
+ }
+
+ kdDebug () << appName << ": m_maxCacheSize = " << m_maxCacheSize << endl;
+ totalSize = directorySize(cacheDirName);
+ totalSize = (totalSize + 1023) / 1024;
+ kdDebug () << appName << ": totalSize = " << totalSize << endl;
+ if (totalSize > m_maxCacheSize)
+ {
+ // Cache is too big, so something will need to be deleted.
+ // Delete everything down to 20%.
+ m_maxCacheSize = m_maxCacheSize * 4 / 5;
+ kdDebug () << appName << ": Cache must shrink, new m_maxCacheSize = " << m_maxCacheSize << endl;
+ }
+ else
+ {
+ // Cache has more room to grow, so we are done.
+ kdDebug () << appName << ": Current size of cache = " << totalSize << " kB, no recent objects needed to be deleted." << endl;
+ return 0;
+ }
+
+#endif // __linux
+
+
QStringList dirs = cacheDir.entryList( );
FileInfoList cachedEntries;
@@ -239,27 +328,30 @@
{
if ((*it)[0] != '.')
{
- scanDirectory( cachedEntries, *it, strCacheDir + "/" + *it);
+ scanDirectory( cachedEntries, *it, strCacheDir + "/" + *it);
}
}
- cachedEntries.sort();
+ if (m_maxCacheSize > 0)
+ {
+ // Don't bother sorting if the user requested all files be deleted anyway.
+ cachedEntries.sort();
+ }
int maxCachedSize = m_maxCacheSize / 2;
-
for(FileInfo *fileInfo = cachedEntries.first();
fileInfo;
fileInfo = cachedEntries.next())
{
if (fileInfo->size > maxCachedSize)
{
- QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
- unlink(filename.data());
+ QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
+ unlink(filename.data());
// kdDebug () << appName << ": Object too big, deleting '" << filename.data() << "' (" << result<< ")" << endl;
}
}
- int totalSize = 0;
+ totalSize = 0;
for(FileInfo *fileInfo = cachedEntries.first();
fileInfo;
@@ -267,18 +359,16 @@
{
if ((totalSize + fileInfo->size) > m_maxCacheSize)
{
- QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
- unlink(filename.data());
+ QCString filename = QFile::encodeName( strCacheDir + "/" + fileInfo->name);
+ unlink(filename.data());
// kdDebug () << appName << ": Cache too big, deleting '" << filename.data() << "' (" << fileInfo->size << ")" << endl;
}
else
{
- totalSize += fileInfo->size;
+ totalSize += fileInfo->size;
// fprintf(stderr, "Keep in cache: %s %d %d total = %d\n", fileInfo->name.ascii(), fileInfo->size, fileInfo->age, totalSize);
}
}
kdDebug () << appName << ": Current size of cache = " << totalSize << " kB." << endl;
return 0;
}
-
-
Reply to: