[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#987013: Release goal proposal: Remove Berkeley DB



Matthias Klose wrote:
>> Then there's user code too. I also think we'll need at least a dumper
>> utility so that users can migrate their data manually when they discover
>> their program no longer works after upgrading.
>
> For Python, the dbm/ndbm.py module, based on the _dbm extension is also
> affected.  You can build the _dbm extension using libgdbm-compat-dev, however
> that changes the on-disk format, and the license used (likely the new one should
> be moved into the python3-gdbm package).

Hi, I'm a nosy bystander.

Last year I was annoyed by scrapy using bdb to cache entire HTTP responses (including large HTML bodies).
As an experiment, I wrote some proof-of-concept code for other backends.
IIRC if the database doesn't exist yet, they can drop-in replace "import dbm".
Here they are attached, do with them what you will.
I don't intend to touch them again myself.

(They are expat licensed, but I can relicense if needed.)

(FWIW, I eventually ended up patching scrapy to use sqlite3 directly, and then gave up on scrapy entirely.)
# SYNOPSIS: a shim to use lmdb where in apps that expect dbm's API
#
#   import dbm2lmdb as dbm
#   db = dbm.open('porn')
#   db['Alice'] = 'Bob'
#   db['Alice']
#   db['Clara']
#   del db['Alice']
#   db.close()
#
# SEE ALSO:
#
#   https://en.wikipedia.org/wiki/Tokyo_Cabinet_and_Kyoto_Cabinet
#   https://dbmx.net/kyotocabinet/pythondoc/  (seriously, frames?)
#
# NOTE: this is not even remotely working currently.

import pathlib
import logging

import kyotocabinet


class DBMLikeKyotoCabinet:
    def __init__(self, path, flags=None, mode=None):
        if flags is not None or mode is not None:
            raise NotImplementedError(flags, mode)
        # kyotocabinet databases MUST have a specific extension?
        path_kch = pathlib.Path(path).with_suffix('.kch')
        self.db = kyotocabinet.DB()
        ok = self.db.open(path_kch)
        if not ok:                          # seriously?
            raise RuntimeError(self.db.error())  # seriously?

    # db['foo']
    def __getitem__(self, key):
        value = self.db.get(_bytes(key))
        if not value:                         # seriously?
            logging.warn('%s', self.db.error)  # seriously?
            return None                        # seriously?

    # db['foo'] = 'bar'
    def __setitem__(self, key, value):
        ok = self.db.set(_bytes(key), _bytes(value))
        if not ok:                              # seriously?
            raise RuntimeError(self.db.error())  # seriously?

    # del db['foo']
    def __delitem__(self, key):
        raise NotImplementedError()

    # 'foo' in db
    # 'foo' not in db
    def __contains__(self, key):
        return self.__getitem__(key) is not None

    def close(self):
        return self.env.close()

    def sync(self):
        return self.env.sync()

    def firstkey(self):
        raise NotImplementedError()

    def nextkey(self):
        raise NotImplementedError()

    def reorganize(self):
        raise NotImplementedError()


def open(*args, **kwargs):
    return DBMLikeKyotoCabinet(*args, **kwargs)


def whichdb(*args, **kwargs):
    raise NotImplementedError()


def _bytes(b):
    if isinstance(b, bytes):
        return b
    elif isinstance(b, str):
        return bytes(b, encoding='UTF-8')
    else:
        raise ValueError(b)
# SYNOPSIS: a shim to use lmdb where in apps that expect dbm's API
#
#   import dbm2lmdb as dbm
#   db = dbm.open('porn')
#   db['Alice'] = 'Bob'
#   db['Alice']
#   db['Clara']
#   del db['Alice']
#   db.close()
#
# SEE ALSO:
#
#   https://lmdb.readthedocs.io
#   http://www.lmdb.tech/doc/

import lmdb


class DBMLikeLMDBEnvironment:
    def __init__(self,
                 path: str,
                 flags: str = 'r',
                 mode: int = 0o666):
        for c in flags:
            if c not in 'rwc':
                raise NotImplementedError('Unsupported flag', c)
        self.env = lmdb.Environment(
            str(path),          # str() to add pathlib support
            readonly='r' in flags,
            create='c' in flags)
        # By default LMDB lets you store up to 10MiB; increase that to 1GiB.
        # UPDATE: requires python3-lmdb (>= 0.87); Debian 10 has 0.86.
        # Ouch!  I give up for today.
        self.env.set_mapsize(2**30)

    # db['foo']
    def __getitem__(self, key):
        with self.env.begin() as txn:
            return txn.get(_bytes(key))

    # db['foo'] = 'bar'
    def __setitem__(self, key, value):
        with self.env.begin(write=True) as txn:
            return txn.put(_bytes(key), _bytes(value))

    # del db['foo']
    def __delitem__(self, key):
        with self.env.begin() as txn:
            return txn.delete(_bytes(key))

    # 'foo' in db
    # 'foo' not in db
    def __contains__(self, key):
        return self.__getitem__(key) is not None

    def close(self):
        return self.env.close()

    def sync(self):
        return self.env.sync()

    def firstkey(self):
        raise NotImplementedError()

    def nextkey(self):
        raise NotImplementedError()

    def reorganize(self):
        raise NotImplementedError()


def open(*args, **kwargs):
    return DBMLikeLMDBEnvironment(*args, **kwargs)


def whichdb(*args, **kwargs):
    raise NotImplementedError()


def _bytes(b):
    if isinstance(b, bytes):
        return b
    elif isinstance(b, str):
        return bytes(b, encoding='UTF-8')
    else:
        raise ValueError(b)
# SYNOPSIS: a shim to use sqlite3 where in apps that expect dbm's API
#
#   import dbm2sqlite as dbm
#   db = dbm.open('porn')
#   db['Alice'] = 'Bob'
#   db['Alice']
#   db['Clara']
#   del db['Alice']
#   db.close()
#
# SEE ALSO:
#
#   https://sqlite.org/affcase1.html
#
# PRIOR ART (just use that instead?):
#
#   https://bugs.python.org/issue3783

import sqlite3


class DBMLikeSqliteConnection:
    def __init__(self, path, flags='c', mode=None):
        if mode is not None:
            raise NotImplementedError(mode)
        for c in flags:
            if c not in 'wc':
                raise NotImplementedError('Unsupported flag', c)
        self.conn = sqlite3.connect(path)
        # Enable "go faster" stripes
        self.conn.execute('PRAGMA journal_mode = WAL')
        self.conn.execute('CREATE TABLE IF NOT EXISTS main(key BLOB PRIMARY KEY, value BLOB) WITHOUT ROWID;')

    # db['foo']
    def __getitem__(self, key):
        row = self.conn.execute(
            'SELECT value FROM main WHERE key = :key',
            {'key': _bytes(key)}).fetchone()
        if row:
            return row[0]
        else:
            return None

    # db['foo'] = 'bar'
    def __setitem__(self, key, value):
        self.conn.execute(
            'REPLACE INTO main (key, value) VALUES (:key, :value)',
            {'key': _bytes(key),
             'value': _bytes(value)})
        self.conn.commit()      # FIXME: yuk

    # del db['foo']
    def __delitem__(self, key):
        self.conn.execute(
            'DELETE FROM main WHERE key = :key',
            {'key': _bytes(key)})
        self.conn.commit()      # FIXME: yuk

    # 'foo' in db
    # 'foo' not in db
    def __contains__(self, key):
        return self.__getitem__(key) is not None

    def close(self):
        return self.conn.close()

    def sync(self):
        raise NotImplementedError()

    def firstkey(self):
        raise NotImplementedError()

    def nextkey(self):
        raise NotImplementedError()

    def reorganize(self):
        raise NotImplementedError()


def open(*args, **kwargs):
    return DBMLikeSqliteConnection(*args, **kwargs)


def whichdb(*args, **kwargs):
    raise NotImplementedError()


def _bytes(b):
    if isinstance(b, bytes):
        return b
    elif isinstance(b, str):
        return bytes(b, encoding='UTF-8')
    else:
        raise ValueError(b)

Reply to: