Bug#987013: Release goal proposal: Remove Berkeley DB
- To: 987013@bugs.debian.org
- Cc: debian-release@lists.debian.org, Matthias Klose <doko@debian.org>
- Subject: Bug#987013: Release goal proposal: Remove Berkeley DB
- From: "Trent W. Buck" <trentbuck@gmail.com>
- Date: Tue, 24 Aug 2021 00:12:46 +1000
- Message-id: <YSOs3r+JvkZ/fxwu@hera.lan>
- Reply-to: "Trent W. Buck" <trentbuck@gmail.com>, 987013@bugs.debian.org
- In-reply-to: <01463807-0ff8-2af5-fea9-05fcf3dd7e01@debian.org>
- References: <161850313718.205852.14724994036001883500.reportbug@rockhammer.waldi.eu.org> <YHmnY+544/OfCIU6@bongo.bofh.it> <161850313718.205852.14724994036001883500.reportbug@rockhammer.waldi.eu.org> <20210416182951.GB29176@shell.thinkmo.de> <161850313718.205852.14724994036001883500.reportbug@rockhammer.waldi.eu.org> <YJLpLzHIfNRzdGWj@estella.local.invalid> <161850313718.205852.14724994036001883500.reportbug@rockhammer.waldi.eu.org> <01463807-0ff8-2af5-fea9-05fcf3dd7e01@debian.org> <161850313718.205852.14724994036001883500.reportbug@rockhammer.waldi.eu.org>
Matthias Klose wrote:
>> Then there's user code too. I also think we'll need at least a dumper
>> utility so that users can migrate their data manually when they discover
>> their program no longer works after upgrading.
>
> For Python, the dbm/ndbm.py module, based on the _dbm extension is also
> affected. You can build the _dbm extension using libgdbm-compat-dev, however
> that changes the on-disk format, and the license used (likely the new one should
> be moved into the python3-gdbm package).
Hi, I'm a nosy bystander.
Last year I was annoyed by scrapy using bdb to cache entire HTTP responses (including large HTML bodies).
As an experiment, I wrote some proof-of-concept code for other backends.
IIRC if the database doesn't exist yet, they can drop-in replace "import dbm".
Here they are attached, do with them what you will.
I don't intend to touch them again myself.
(They are expat licensed, but I can relicense if needed.)
(FWIW, I eventually ended up patching scrapy to use sqlite3 directly, and then gave up on scrapy entirely.)
# SYNOPSIS: a shim to use lmdb where in apps that expect dbm's API
#
# import dbm2lmdb as dbm
# db = dbm.open('porn')
# db['Alice'] = 'Bob'
# db['Alice']
# db['Clara']
# del db['Alice']
# db.close()
#
# SEE ALSO:
#
# https://en.wikipedia.org/wiki/Tokyo_Cabinet_and_Kyoto_Cabinet
# https://dbmx.net/kyotocabinet/pythondoc/ (seriously, frames?)
#
# NOTE: this is not even remotely working currently.
import pathlib
import logging
import kyotocabinet
class DBMLikeKyotoCabinet:
def __init__(self, path, flags=None, mode=None):
if flags is not None or mode is not None:
raise NotImplementedError(flags, mode)
# kyotocabinet databases MUST have a specific extension?
path_kch = pathlib.Path(path).with_suffix('.kch')
self.db = kyotocabinet.DB()
ok = self.db.open(path_kch)
if not ok: # seriously?
raise RuntimeError(self.db.error()) # seriously?
# db['foo']
def __getitem__(self, key):
value = self.db.get(_bytes(key))
if not value: # seriously?
logging.warn('%s', self.db.error) # seriously?
return None # seriously?
# db['foo'] = 'bar'
def __setitem__(self, key, value):
ok = self.db.set(_bytes(key), _bytes(value))
if not ok: # seriously?
raise RuntimeError(self.db.error()) # seriously?
# del db['foo']
def __delitem__(self, key):
raise NotImplementedError()
# 'foo' in db
# 'foo' not in db
def __contains__(self, key):
return self.__getitem__(key) is not None
def close(self):
return self.env.close()
def sync(self):
return self.env.sync()
def firstkey(self):
raise NotImplementedError()
def nextkey(self):
raise NotImplementedError()
def reorganize(self):
raise NotImplementedError()
def open(*args, **kwargs):
return DBMLikeKyotoCabinet(*args, **kwargs)
def whichdb(*args, **kwargs):
raise NotImplementedError()
def _bytes(b):
if isinstance(b, bytes):
return b
elif isinstance(b, str):
return bytes(b, encoding='UTF-8')
else:
raise ValueError(b)
# SYNOPSIS: a shim to use lmdb where in apps that expect dbm's API
#
# import dbm2lmdb as dbm
# db = dbm.open('porn')
# db['Alice'] = 'Bob'
# db['Alice']
# db['Clara']
# del db['Alice']
# db.close()
#
# SEE ALSO:
#
# https://lmdb.readthedocs.io
# http://www.lmdb.tech/doc/
import lmdb
class DBMLikeLMDBEnvironment:
def __init__(self,
path: str,
flags: str = 'r',
mode: int = 0o666):
for c in flags:
if c not in 'rwc':
raise NotImplementedError('Unsupported flag', c)
self.env = lmdb.Environment(
str(path), # str() to add pathlib support
readonly='r' in flags,
create='c' in flags)
# By default LMDB lets you store up to 10MiB; increase that to 1GiB.
# UPDATE: requires python3-lmdb (>= 0.87); Debian 10 has 0.86.
# Ouch! I give up for today.
self.env.set_mapsize(2**30)
# db['foo']
def __getitem__(self, key):
with self.env.begin() as txn:
return txn.get(_bytes(key))
# db['foo'] = 'bar'
def __setitem__(self, key, value):
with self.env.begin(write=True) as txn:
return txn.put(_bytes(key), _bytes(value))
# del db['foo']
def __delitem__(self, key):
with self.env.begin() as txn:
return txn.delete(_bytes(key))
# 'foo' in db
# 'foo' not in db
def __contains__(self, key):
return self.__getitem__(key) is not None
def close(self):
return self.env.close()
def sync(self):
return self.env.sync()
def firstkey(self):
raise NotImplementedError()
def nextkey(self):
raise NotImplementedError()
def reorganize(self):
raise NotImplementedError()
def open(*args, **kwargs):
return DBMLikeLMDBEnvironment(*args, **kwargs)
def whichdb(*args, **kwargs):
raise NotImplementedError()
def _bytes(b):
if isinstance(b, bytes):
return b
elif isinstance(b, str):
return bytes(b, encoding='UTF-8')
else:
raise ValueError(b)
# SYNOPSIS: a shim to use sqlite3 where in apps that expect dbm's API
#
# import dbm2sqlite as dbm
# db = dbm.open('porn')
# db['Alice'] = 'Bob'
# db['Alice']
# db['Clara']
# del db['Alice']
# db.close()
#
# SEE ALSO:
#
# https://sqlite.org/affcase1.html
#
# PRIOR ART (just use that instead?):
#
# https://bugs.python.org/issue3783
import sqlite3
class DBMLikeSqliteConnection:
def __init__(self, path, flags='c', mode=None):
if mode is not None:
raise NotImplementedError(mode)
for c in flags:
if c not in 'wc':
raise NotImplementedError('Unsupported flag', c)
self.conn = sqlite3.connect(path)
# Enable "go faster" stripes
self.conn.execute('PRAGMA journal_mode = WAL')
self.conn.execute('CREATE TABLE IF NOT EXISTS main(key BLOB PRIMARY KEY, value BLOB) WITHOUT ROWID;')
# db['foo']
def __getitem__(self, key):
row = self.conn.execute(
'SELECT value FROM main WHERE key = :key',
{'key': _bytes(key)}).fetchone()
if row:
return row[0]
else:
return None
# db['foo'] = 'bar'
def __setitem__(self, key, value):
self.conn.execute(
'REPLACE INTO main (key, value) VALUES (:key, :value)',
{'key': _bytes(key),
'value': _bytes(value)})
self.conn.commit() # FIXME: yuk
# del db['foo']
def __delitem__(self, key):
self.conn.execute(
'DELETE FROM main WHERE key = :key',
{'key': _bytes(key)})
self.conn.commit() # FIXME: yuk
# 'foo' in db
# 'foo' not in db
def __contains__(self, key):
return self.__getitem__(key) is not None
def close(self):
return self.conn.close()
def sync(self):
raise NotImplementedError()
def firstkey(self):
raise NotImplementedError()
def nextkey(self):
raise NotImplementedError()
def reorganize(self):
raise NotImplementedError()
def open(*args, **kwargs):
return DBMLikeSqliteConnection(*args, **kwargs)
def whichdb(*args, **kwargs):
raise NotImplementedError()
def _bytes(b):
if isinstance(b, bytes):
return b
elif isinstance(b, str):
return bytes(b, encoding='UTF-8')
else:
raise ValueError(b)
Reply to: