Edit File by line

"""A dumb and slow but simple dbm clone.

[0] Fix | Delete

[1] Fix | Delete

For database spam, spam.dir contains the index (a text file),

[2] Fix | Delete

spam.bak *may* contain a backup of the index (also a text file),

[3] Fix | Delete

while spam.dat contains the data (a binary file).

[4] Fix | Delete

[5] Fix | Delete

XXX TO DO:

[6] Fix | Delete

[7] Fix | Delete

- seems to contain a bug when updating...

[8] Fix | Delete

[9] Fix | Delete

- reclaim free space (currently, space once occupied by deleted or expanded

[10] Fix | Delete

items is never reused)

[11] Fix | Delete

[12] Fix | Delete

- support concurrent access (currently, if two processes take turns making

[13] Fix | Delete

updates, they can mess up the index)

[14] Fix | Delete

[15] Fix | Delete

- support efficient access to large databases (currently, the whole index

[16] Fix | Delete

is read when the database is opened, and some updates rewrite the whole index)

[17] Fix | Delete

[18] Fix | Delete

- support opening for read-only (flag = 'm')

[19] Fix | Delete

[20] Fix | Delete

"""

[21] Fix | Delete

[22] Fix | Delete

import ast as _ast

[23] Fix | Delete

import os as _os

[24] Fix | Delete

import __builtin__

[25] Fix | Delete

import UserDict

[26] Fix | Delete

[27] Fix | Delete

_open = __builtin__.open

[28] Fix | Delete

[29] Fix | Delete

_BLOCKSIZE = 512

[30] Fix | Delete

[31] Fix | Delete

error = IOError # For anydbm

[32] Fix | Delete

[33] Fix | Delete

class _Database(UserDict.DictMixin):

[34] Fix | Delete

[35] Fix | Delete

# The on-disk directory and data files can remain in mutually

[36] Fix | Delete

# inconsistent states for an arbitrarily long time (see comments

[37] Fix | Delete

# at the end of __setitem__). This is only repaired when _commit()

[38] Fix | Delete

# gets called. One place _commit() gets called is from __del__(),

[39] Fix | Delete

# and if that occurs at program shutdown time, module globals may

[40] Fix | Delete

# already have gotten rebound to None. Since it's crucial that

[41] Fix | Delete

# _commit() finish successfully, we can't ignore shutdown races

[42] Fix | Delete

# here, and _commit() must not reference any globals.

[43] Fix | Delete

_os = _os # for _commit()

[44] Fix | Delete

_open = _open # for _commit()

[45] Fix | Delete

[46] Fix | Delete

def __init__(self, filebasename, mode, flag='c'):

[47] Fix | Delete

self._mode = mode

[48] Fix | Delete

self._readonly = (flag == 'r')

[49] Fix | Delete

[50] Fix | Delete

# The directory file is a text file. Each line looks like

[51] Fix | Delete

# "%r, (%d, %d)\n" % (key, pos, siz)

[52] Fix | Delete

# where key is the string key, pos is the offset into the dat

[53] Fix | Delete

# file of the associated value's first byte, and siz is the number

[54] Fix | Delete

# of bytes in the associated value.

[55] Fix | Delete

self._dirfile = filebasename + _os.extsep + 'dir'

[56] Fix | Delete

[57] Fix | Delete

# The data file is a binary file pointed into by the directory

[58] Fix | Delete

# file, and holds the values associated with keys. Each value

[59] Fix | Delete

# begins at a _BLOCKSIZE-aligned byte offset, and is a raw

[60] Fix | Delete

# binary 8-bit string value.

[61] Fix | Delete

self._datfile = filebasename + _os.extsep + 'dat'

[62] Fix | Delete

self._bakfile = filebasename + _os.extsep + 'bak'

[63] Fix | Delete

[64] Fix | Delete

# The index is an in-memory dict, mirroring the directory file.

[65] Fix | Delete

self._index = None # maps keys to (pos, siz) pairs

[66] Fix | Delete

[67] Fix | Delete

# Mod by Jack: create data file if needed

[68] Fix | Delete

try:

[69] Fix | Delete

f = _open(self._datfile, 'r')

[70] Fix | Delete

except IOError:

[71] Fix | Delete

with _open(self._datfile, 'w') as f:

[72] Fix | Delete

self._chmod(self._datfile)

[73] Fix | Delete

else:

[74] Fix | Delete

f.close()

[75] Fix | Delete

self._update()

[76] Fix | Delete

[77] Fix | Delete

# Read directory file into the in-memory index dict.

[78] Fix | Delete

def _update(self):

[79] Fix | Delete

self._index = {}

[80] Fix | Delete

try:

[81] Fix | Delete

f = _open(self._dirfile)

[82] Fix | Delete

except IOError:

[83] Fix | Delete

self._modified = not self._readonly

[84] Fix | Delete

else:

[85] Fix | Delete

self._modified = False

[86] Fix | Delete

with f:

[87] Fix | Delete

for line in f:

[88] Fix | Delete

line = line.rstrip()

[89] Fix | Delete

key, pos_and_siz_pair = _ast.literal_eval(line)

[90] Fix | Delete

self._index[key] = pos_and_siz_pair

[91] Fix | Delete

[92] Fix | Delete

# Write the index dict to the directory file. The original directory

[93] Fix | Delete

# file (if any) is renamed with a .bak extension first. If a .bak

[94] Fix | Delete

# file currently exists, it's deleted.

[95] Fix | Delete

def _commit(self):

[96] Fix | Delete

# CAUTION: It's vital that _commit() succeed, and _commit() can

[97] Fix | Delete

# be called from __del__(). Therefore we must never reference a

[98] Fix | Delete

# global in this routine.

[99] Fix | Delete

if self._index is None or not self._modified:

[100] Fix | Delete

return # nothing to do

[101] Fix | Delete

[102] Fix | Delete

try:

[103] Fix | Delete

self._os.unlink(self._bakfile)

[104] Fix | Delete

except self._os.error:

[105] Fix | Delete

pass

[106] Fix | Delete

[107] Fix | Delete

try:

[108] Fix | Delete

self._os.rename(self._dirfile, self._bakfile)

[109] Fix | Delete

except self._os.error:

[110] Fix | Delete

pass

[111] Fix | Delete

[112] Fix | Delete

with self._open(self._dirfile, 'w') as f:

[113] Fix | Delete

self._chmod(self._dirfile)

[114] Fix | Delete

for key, pos_and_siz_pair in self._index.iteritems():

[115] Fix | Delete

f.write("%r, %r\n" % (key, pos_and_siz_pair))

[116] Fix | Delete

[117] Fix | Delete

sync = _commit

[118] Fix | Delete

[119] Fix | Delete

def __getitem__(self, key):

[120] Fix | Delete

pos, siz = self._index[key] # may raise KeyError

[121] Fix | Delete

with _open(self._datfile, 'rb') as f:

[122] Fix | Delete

f.seek(pos)

[123] Fix | Delete

dat = f.read(siz)

[124] Fix | Delete

return dat

[125] Fix | Delete

[126] Fix | Delete

# Append val to the data file, starting at a _BLOCKSIZE-aligned

[127] Fix | Delete

# offset. The data file is first padded with NUL bytes (if needed)

[128] Fix | Delete

# to get to an aligned offset. Return pair

[129] Fix | Delete

# (starting offset of val, len(val))

[130] Fix | Delete

def _addval(self, val):

[131] Fix | Delete

with _open(self._datfile, 'rb+') as f:

[132] Fix | Delete

f.seek(0, 2)

[133] Fix | Delete

pos = int(f.tell())

[134] Fix | Delete

npos = ((pos + _BLOCKSIZE - 1) // _BLOCKSIZE) * _BLOCKSIZE

[135] Fix | Delete

f.write('\0'*(npos-pos))

[136] Fix | Delete

pos = npos

[137] Fix | Delete

f.write(val)

[138] Fix | Delete

return (pos, len(val))

[139] Fix | Delete

[140] Fix | Delete

# Write val to the data file, starting at offset pos. The caller

[141] Fix | Delete

# is responsible for ensuring that there's enough room starting at

[142] Fix | Delete

# pos to hold val, without overwriting some other value. Return

[143] Fix | Delete

# pair (pos, len(val)).

[144] Fix | Delete

def _setval(self, pos, val):

[145] Fix | Delete

with _open(self._datfile, 'rb+') as f:

[146] Fix | Delete

f.seek(pos)

[147] Fix | Delete

f.write(val)

[148] Fix | Delete

return (pos, len(val))

[149] Fix | Delete

[150] Fix | Delete

# key is a new key whose associated value starts in the data file

[151] Fix | Delete

# at offset pos and with length siz. Add an index record to

[152] Fix | Delete

# the in-memory index dict, and append one to the directory file.

[153] Fix | Delete

def _addkey(self, key, pos_and_siz_pair):

[154] Fix | Delete

self._index[key] = pos_and_siz_pair

[155] Fix | Delete

with _open(self._dirfile, 'a') as f:

[156] Fix | Delete

self._chmod(self._dirfile)

[157] Fix | Delete

f.write("%r, %r\n" % (key, pos_and_siz_pair))

[158] Fix | Delete

[159] Fix | Delete

def __setitem__(self, key, val):

[160] Fix | Delete

if not type(key) == type('') == type(val):

[161] Fix | Delete

raise TypeError, "keys and values must be strings"

[162] Fix | Delete

self._modified = True

[163] Fix | Delete

if key not in self._index:

[164] Fix | Delete

self._addkey(key, self._addval(val))

[165] Fix | Delete

else:

[166] Fix | Delete

# See whether the new value is small enough to fit in the

[167] Fix | Delete

# (padded) space currently occupied by the old value.

[168] Fix | Delete

pos, siz = self._index[key]

[169] Fix | Delete

oldblocks = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE

[170] Fix | Delete

newblocks = (len(val) + _BLOCKSIZE - 1) // _BLOCKSIZE

[171] Fix | Delete

if newblocks <= oldblocks:

[172] Fix | Delete

self._index[key] = self._setval(pos, val)

[173] Fix | Delete

else:

[174] Fix | Delete

# The new value doesn't fit in the (padded) space used

[175] Fix | Delete

# by the old value. The blocks used by the old value are

[176] Fix | Delete

# forever lost.

[177] Fix | Delete

self._index[key] = self._addval(val)

[178] Fix | Delete

[179] Fix | Delete

# Note that _index may be out of synch with the directory

[180] Fix | Delete

# file now: _setval() and _addval() don't update the directory

[181] Fix | Delete

# file. This also means that the on-disk directory and data

[182] Fix | Delete

# files are in a mutually inconsistent state, and they'll

[183] Fix | Delete

# remain that way until _commit() is called. Note that this

[184] Fix | Delete

# is a disaster (for the database) if the program crashes

[185] Fix | Delete

# (so that _commit() never gets called).

[186] Fix | Delete

[187] Fix | Delete

def __delitem__(self, key):

[188] Fix | Delete

self._modified = True

[189] Fix | Delete

# The blocks used by the associated value are lost.

[190] Fix | Delete

del self._index[key]

[191] Fix | Delete

# XXX It's unclear why we do a _commit() here (the code always

[192] Fix | Delete

# XXX has, so I'm not changing it). _setitem__ doesn't try to

[193] Fix | Delete

# XXX keep the directory file in synch. Why should we? Or

[194] Fix | Delete

# XXX why shouldn't __setitem__?

[195] Fix | Delete

self._commit()

[196] Fix | Delete

[197] Fix | Delete

def keys(self):

[198] Fix | Delete

return self._index.keys()

[199] Fix | Delete

[200] Fix | Delete

def has_key(self, key):

[201] Fix | Delete

return key in self._index

[202] Fix | Delete

[203] Fix | Delete

def __contains__(self, key):

[204] Fix | Delete

return key in self._index

[205] Fix | Delete

[206] Fix | Delete

def iterkeys(self):

[207] Fix | Delete

return self._index.iterkeys()

[208] Fix | Delete

__iter__ = iterkeys

[209] Fix | Delete

[210] Fix | Delete

def __len__(self):

[211] Fix | Delete

return len(self._index)

[212] Fix | Delete

[213] Fix | Delete

def close(self):

[214] Fix | Delete

try:

[215] Fix | Delete

self._commit()

[216] Fix | Delete

finally:

[217] Fix | Delete

self._index = self._datfile = self._dirfile = self._bakfile = None

[218] Fix | Delete

[219] Fix | Delete

__del__ = close

[220] Fix | Delete

[221] Fix | Delete

def _chmod (self, file):

[222] Fix | Delete

if hasattr(self._os, 'chmod'):

[223] Fix | Delete

self._os.chmod(file, self._mode)

[224] Fix | Delete

[225] Fix | Delete

[226] Fix | Delete

def open(file, flag=None, mode=0666):

[227] Fix | Delete

"""Open the database file, filename, and return corresponding object.

[228] Fix | Delete

[229] Fix | Delete

The flag argument, used to control how the database is opened in the

[230] Fix | Delete

other DBM implementations, is ignored in the dumbdbm module; the

[231] Fix | Delete

database is always opened for update, and will be created if it does

[232] Fix | Delete

not exist.

[233] Fix | Delete

[234] Fix | Delete

The optional mode argument is the UNIX mode of the file, used only when

[235] Fix | Delete

the database has to be created. It defaults to octal code 0666 (and

[236] Fix | Delete

will be modified by the prevailing umask).

[237] Fix | Delete

[238] Fix | Delete

"""

[239] Fix | Delete

# flag argument is currently ignored

[240] Fix | Delete

[241] Fix | Delete

# Modify mode depending on the umask

[242] Fix | Delete

try:

[243] Fix | Delete

um = _os.umask(0)

[244] Fix | Delete

_os.umask(um)

[245] Fix | Delete

except AttributeError:

[246] Fix | Delete

pass

[247] Fix | Delete

else:

[248] Fix | Delete

# Turn off any bits that are set in the umask

[249] Fix | Delete

mode = mode & (~um)

[250] Fix | Delete

[251] Fix | Delete

return _Database(file, mode, flag)

[252] Fix | Delete

[253] Fix | Delete