Edit File by line

#! /opt/alt/python312/bin/python3.12

[0] Fix | Delete

# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>

[1] Fix | Delete

[2] Fix | Delete

"""Generate binary message catalog from textual translation description.

[3] Fix | Delete

[4] Fix | Delete

This program converts a textual Uniforum-style message catalog (.po file) into

[5] Fix | Delete

a binary GNU catalog (.mo file). This is essentially the same function as the

[6] Fix | Delete

GNU msgfmt program, however, it is a simpler implementation. Currently it

[7] Fix | Delete

does not handle plural forms but it does handle message contexts.

[8] Fix | Delete

[9] Fix | Delete

Usage: msgfmt.py [OPTIONS] filename.po

[10] Fix | Delete

[11] Fix | Delete

Options:

[12] Fix | Delete

-o file

[13] Fix | Delete

--output-file=file

[14] Fix | Delete

Specify the output file to write to. If omitted, output will go to a

[15] Fix | Delete

file named filename.mo (based off the input file name).

[16] Fix | Delete

[17] Fix | Delete

-h

[18] Fix | Delete

--help

[19] Fix | Delete

Print this message and exit.

[20] Fix | Delete

[21] Fix | Delete

-V

[22] Fix | Delete

--version

[23] Fix | Delete

Display version information and exit.

[24] Fix | Delete

"""

[25] Fix | Delete

[26] Fix | Delete

import os

[27] Fix | Delete

import sys

[28] Fix | Delete

import ast

[29] Fix | Delete

import getopt

[30] Fix | Delete

import struct

[31] Fix | Delete

import array

[32] Fix | Delete

from email.parser import HeaderParser

[33] Fix | Delete

[34] Fix | Delete

__version__ = "1.2"

[35] Fix | Delete

[36] Fix | Delete

MESSAGES = {}

[37] Fix | Delete

[38] Fix | Delete

[39] Fix | Delete

def usage(code, msg=''):

[40] Fix | Delete

print(__doc__, file=sys.stderr)

[41] Fix | Delete

if msg:

[42] Fix | Delete

print(msg, file=sys.stderr)

[43] Fix | Delete

sys.exit(code)

[44] Fix | Delete

[45] Fix | Delete

[46] Fix | Delete

def add(ctxt, id, str, fuzzy):

[47] Fix | Delete

"Add a non-fuzzy translation to the dictionary."

[48] Fix | Delete

global MESSAGES

[49] Fix | Delete

if not fuzzy and str:

[50] Fix | Delete

if ctxt is None:

[51] Fix | Delete

MESSAGES[id] = str

[52] Fix | Delete

else:

[53] Fix | Delete

MESSAGES[b"%b\x04%b" % (ctxt, id)] = str

[54] Fix | Delete

[55] Fix | Delete

[56] Fix | Delete

def generate():

[57] Fix | Delete

"Return the generated output."

[58] Fix | Delete

global MESSAGES

[59] Fix | Delete

# the keys are sorted in the .mo file

[60] Fix | Delete

keys = sorted(MESSAGES.keys())

[61] Fix | Delete

offsets = []

[62] Fix | Delete

ids = strs = b''

[63] Fix | Delete

for id in keys:

[64] Fix | Delete

# For each string, we need size and file offset. Each string is NUL

[65] Fix | Delete

# terminated; the NUL does not count into the size.

[66] Fix | Delete

offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))

[67] Fix | Delete

ids += id + b'\0'

[68] Fix | Delete

strs += MESSAGES[id] + b'\0'

[69] Fix | Delete

output = ''

[70] Fix | Delete

# The header is 7 32-bit unsigned integers. We don't use hash tables, so

[71] Fix | Delete

# the keys start right after the index tables.

[72] Fix | Delete

# translated string.

[73] Fix | Delete

keystart = 7*4+16*len(keys)

[74] Fix | Delete

# and the values start after the keys

[75] Fix | Delete

valuestart = keystart + len(ids)

[76] Fix | Delete

koffsets = []

[77] Fix | Delete

voffsets = []

[78] Fix | Delete

# The string table first has the list of keys, then the list of values.

[79] Fix | Delete

# Each entry has first the size of the string, then the file offset.

[80] Fix | Delete

for o1, l1, o2, l2 in offsets:

[81] Fix | Delete

koffsets += [l1, o1+keystart]

[82] Fix | Delete

voffsets += [l2, o2+valuestart]

[83] Fix | Delete

offsets = koffsets + voffsets

[84] Fix | Delete

output = struct.pack("Iiiiiii",

[85] Fix | Delete

0x950412de, # Magic

[86] Fix | Delete

0, # Version

[87] Fix | Delete

len(keys), # # of entries

[88] Fix | Delete

7*4, # start of key index

[89] Fix | Delete

7*4+len(keys)*8, # start of value index

[90] Fix | Delete

0, 0) # size and offset of hash table

[91] Fix | Delete

output += array.array("i", offsets).tobytes()

[92] Fix | Delete

output += ids

[93] Fix | Delete

output += strs

[94] Fix | Delete

return output

[95] Fix | Delete

[96] Fix | Delete

[97] Fix | Delete

def make(filename, outfile):

[98] Fix | Delete

ID = 1

[99] Fix | Delete

STR = 2

[100] Fix | Delete

CTXT = 3

[101] Fix | Delete

[102] Fix | Delete

# Compute .mo name from .po name and arguments

[103] Fix | Delete

if filename.endswith('.po'):

[104] Fix | Delete

infile = filename

[105] Fix | Delete

else:

[106] Fix | Delete

infile = filename + '.po'

[107] Fix | Delete

if outfile is None:

[108] Fix | Delete

outfile = os.path.splitext(infile)[0] + '.mo'

[109] Fix | Delete

[110] Fix | Delete

try:

[111] Fix | Delete

with open(infile, 'rb') as f:

[112] Fix | Delete

lines = f.readlines()

[113] Fix | Delete

except IOError as msg:

[114] Fix | Delete

print(msg, file=sys.stderr)

[115] Fix | Delete

sys.exit(1)

[116] Fix | Delete

[117] Fix | Delete

section = msgctxt = None

[118] Fix | Delete

fuzzy = 0

[119] Fix | Delete

[120] Fix | Delete

# Start off assuming Latin-1, so everything decodes without failure,

[121] Fix | Delete

# until we know the exact encoding

[122] Fix | Delete

encoding = 'latin-1'

[123] Fix | Delete

[124] Fix | Delete

# Parse the catalog

[125] Fix | Delete

lno = 0

[126] Fix | Delete

for l in lines:

[127] Fix | Delete

l = l.decode(encoding)

[128] Fix | Delete

lno += 1

[129] Fix | Delete

# If we get a comment line after a msgstr, this is a new entry

[130] Fix | Delete

if l[0] == '#' and section == STR:

[131] Fix | Delete

add(msgctxt, msgid, msgstr, fuzzy)

[132] Fix | Delete

section = msgctxt = None

[133] Fix | Delete

fuzzy = 0

[134] Fix | Delete

# Record a fuzzy mark

[135] Fix | Delete

if l[:2] == '#,' and 'fuzzy' in l:

[136] Fix | Delete

fuzzy = 1

[137] Fix | Delete

# Skip comments

[138] Fix | Delete

if l[0] == '#':

[139] Fix | Delete

continue

[140] Fix | Delete

# Now we are in a msgid or msgctxt section, output previous section

[141] Fix | Delete

if l.startswith('msgctxt'):

[142] Fix | Delete

if section == STR:

[143] Fix | Delete

add(msgctxt, msgid, msgstr, fuzzy)

[144] Fix | Delete

section = CTXT

[145] Fix | Delete

l = l[7:]

[146] Fix | Delete

msgctxt = b''

[147] Fix | Delete

elif l.startswith('msgid') and not l.startswith('msgid_plural'):

[148] Fix | Delete

if section == STR:

[149] Fix | Delete

add(msgctxt, msgid, msgstr, fuzzy)

[150] Fix | Delete

if not msgid:

[151] Fix | Delete

# See whether there is an encoding declaration

[152] Fix | Delete

p = HeaderParser()

[153] Fix | Delete

charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()

[154] Fix | Delete

if charset:

[155] Fix | Delete

encoding = charset

[156] Fix | Delete

section = ID

[157] Fix | Delete

l = l[5:]

[158] Fix | Delete

msgid = msgstr = b''

[159] Fix | Delete

is_plural = False

[160] Fix | Delete

# This is a message with plural forms

[161] Fix | Delete

elif l.startswith('msgid_plural'):

[162] Fix | Delete

if section != ID:

[163] Fix | Delete

print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),

[164] Fix | Delete

file=sys.stderr)

[165] Fix | Delete

sys.exit(1)

[166] Fix | Delete

l = l[12:]

[167] Fix | Delete

msgid += b'\0' # separator of singular and plural

[168] Fix | Delete

is_plural = True

[169] Fix | Delete

# Now we are in a msgstr section

[170] Fix | Delete

elif l.startswith('msgstr'):

[171] Fix | Delete

section = STR

[172] Fix | Delete

if l.startswith('msgstr['):

[173] Fix | Delete

if not is_plural:

[174] Fix | Delete

print('plural without msgid_plural on %s:%d' % (infile, lno),

[175] Fix | Delete

file=sys.stderr)

[176] Fix | Delete

sys.exit(1)

[177] Fix | Delete

l = l.split(']', 1)[1]

[178] Fix | Delete

if msgstr:

[179] Fix | Delete

msgstr += b'\0' # Separator of the various plural forms

[180] Fix | Delete

else:

[181] Fix | Delete

if is_plural:

[182] Fix | Delete

print('indexed msgstr required for plural on %s:%d' % (infile, lno),

[183] Fix | Delete

file=sys.stderr)

[184] Fix | Delete

sys.exit(1)

[185] Fix | Delete

l = l[6:]

[186] Fix | Delete

# Skip empty lines

[187] Fix | Delete

l = l.strip()

[188] Fix | Delete

if not l:

[189] Fix | Delete

continue

[190] Fix | Delete

l = ast.literal_eval(l)

[191] Fix | Delete

if section == CTXT:

[192] Fix | Delete

msgctxt += l.encode(encoding)

[193] Fix | Delete

elif section == ID:

[194] Fix | Delete

msgid += l.encode(encoding)

[195] Fix | Delete

elif section == STR:

[196] Fix | Delete

msgstr += l.encode(encoding)

[197] Fix | Delete

else:

[198] Fix | Delete

print('Syntax error on %s:%d' % (infile, lno), \

[199] Fix | Delete

'before:', file=sys.stderr)

[200] Fix | Delete

print(l, file=sys.stderr)

[201] Fix | Delete

sys.exit(1)

[202] Fix | Delete

# Add last entry

[203] Fix | Delete

if section == STR:

[204] Fix | Delete

add(msgctxt, msgid, msgstr, fuzzy)

[205] Fix | Delete

[206] Fix | Delete

# Compute output

[207] Fix | Delete

output = generate()

[208] Fix | Delete

[209] Fix | Delete

try:

[210] Fix | Delete

with open(outfile,"wb") as f:

[211] Fix | Delete

f.write(output)

[212] Fix | Delete

except IOError as msg:

[213] Fix | Delete

print(msg, file=sys.stderr)

[214] Fix | Delete

[215] Fix | Delete

[216] Fix | Delete

def main():

[217] Fix | Delete

try:

[218] Fix | Delete

opts, args = getopt.getopt(sys.argv[1:], 'hVo:',

[219] Fix | Delete

['help', 'version', 'output-file='])

[220] Fix | Delete

except getopt.error as msg:

[221] Fix | Delete

usage(1, msg)

[222] Fix | Delete

[223] Fix | Delete

outfile = None

[224] Fix | Delete

# parse options

[225] Fix | Delete

for opt, arg in opts:

[226] Fix | Delete

if opt in ('-h', '--help'):

[227] Fix | Delete

usage(0)

[228] Fix | Delete

elif opt in ('-V', '--version'):

[229] Fix | Delete

print("msgfmt.py", __version__)

[230] Fix | Delete

sys.exit(0)

[231] Fix | Delete

elif opt in ('-o', '--output-file'):

[232] Fix | Delete

outfile = arg

[233] Fix | Delete

# do it

[234] Fix | Delete

if not args:

[235] Fix | Delete

print('No input file given', file=sys.stderr)

[236] Fix | Delete

print("Try `msgfmt --help' for more information.", file=sys.stderr)

[237] Fix | Delete

return

[238] Fix | Delete

[239] Fix | Delete

for filename in args:

[240] Fix | Delete

make(filename, outfile)

[241] Fix | Delete

[242] Fix | Delete

[243] Fix | Delete

if __name__ == '__main__':

[244] Fix | Delete

main()

[245] Fix | Delete

[246] Fix | Delete