2002-10-20 14:25:16 +00:00
|
|
|
|
#! /usr/bin/env python
|
2004-01-09 23:56:37 +00:00
|
|
|
|
# -*- coding: iso-8859-1 -*-
|
|
|
|
|
#
|
2002-10-20 14:25:16 +00:00
|
|
|
|
# Originally written by Barry Warsaw <barry@zope.com>
|
|
|
|
|
#
|
|
|
|
|
# Minimally patched to make it even more xgettext compatible
|
|
|
|
|
# by Peter Funk <pf@artcom-gmbh.de>
|
|
|
|
|
#
|
|
|
|
|
# Completely butchered to add glade support for the GRAMPS
|
|
|
|
|
# project by Don Allingham (dallingham@users.sourceforge.net)
|
|
|
|
|
#
|
2005-05-24 13:08:06 +00:00
|
|
|
|
# Further bastardized by Alex Roitman to support tips.xml file
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
2004-01-09 23:56:37 +00:00
|
|
|
|
# $Id$
|
|
|
|
|
|
2002-10-20 14:25:16 +00:00
|
|
|
|
"""pygettext -- Python equivalent of xgettext(1)
|
|
|
|
|
|
|
|
|
|
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
|
|
|
|
internationalization of C programs. Most of these tools are independent of
|
|
|
|
|
the programming language and can be used from within Python programs. Martin
|
|
|
|
|
von Loewis' work[1] helps considerably in this regard.
|
|
|
|
|
|
|
|
|
|
There's one problem though; xgettext is the program that scans source code
|
|
|
|
|
looking for message strings, but it groks only C (or C++). Python introduces
|
|
|
|
|
a few wrinkles, such as dual quoting characters, triple quoted strings, and
|
|
|
|
|
raw strings. xgettext understands none of this.
|
|
|
|
|
|
|
|
|
|
Enter pygettext, which uses Python's standard tokenize module to scan Python
|
|
|
|
|
source code, generating .pot files identical to what GNU xgettext[2] generates
|
|
|
|
|
for C and C++ code. From there, the standard GNU tools can be used.
|
|
|
|
|
|
|
|
|
|
A word about marking Python strings as candidates for translation. GNU
|
|
|
|
|
xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
|
|
|
|
|
gettext_noop. But those can be a lot of text to include all over your code.
|
|
|
|
|
C and C++ have a trick: they use the C preprocessor. Most internationalized C
|
|
|
|
|
source includes a #define for gettext() to _() so that what has to be written
|
|
|
|
|
in the source is much less. Thus these are both translatable strings:
|
|
|
|
|
|
|
|
|
|
gettext("Translatable String")
|
|
|
|
|
_("Translatable String")
|
|
|
|
|
|
|
|
|
|
Python of course has no preprocessor so this doesn't work so well. Thus,
|
|
|
|
|
pygettext searches only for _() by default, but see the -k/--keyword flag
|
|
|
|
|
below for how to augment this.
|
|
|
|
|
|
|
|
|
|
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
|
|
|
|
|
[2] http://www.gnu.org/software/gettext/gettext.html
|
|
|
|
|
|
|
|
|
|
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
|
|
|
|
|
where ever possible. However some options are still missing or are not fully
|
|
|
|
|
implemented. Also, xgettext's use of command line switches with option
|
|
|
|
|
arguments is broken, and in these cases, pygettext just defines additional
|
|
|
|
|
switches.
|
|
|
|
|
|
|
|
|
|
Usage: pygettext [options] inputfile ...
|
|
|
|
|
|
|
|
|
|
Options:
|
|
|
|
|
|
|
|
|
|
-a
|
|
|
|
|
--extract-all
|
|
|
|
|
Extract all strings.
|
|
|
|
|
|
|
|
|
|
-d name
|
|
|
|
|
--default-domain=name
|
|
|
|
|
Rename the default output file from messages.pot to name.pot.
|
|
|
|
|
|
|
|
|
|
-E
|
|
|
|
|
--escape
|
|
|
|
|
Replace non-ASCII characters with octal escape sequences.
|
|
|
|
|
|
|
|
|
|
-h
|
|
|
|
|
--help
|
|
|
|
|
Print this help message and exit.
|
|
|
|
|
|
|
|
|
|
-k word
|
|
|
|
|
--keyword=word
|
|
|
|
|
Keywords to look for in addition to the default set, which are:
|
|
|
|
|
%(DEFAULTKEYWORDS)s
|
|
|
|
|
|
|
|
|
|
You can have multiple -k flags on the command line.
|
|
|
|
|
|
|
|
|
|
-K
|
|
|
|
|
--no-default-keywords
|
|
|
|
|
Disable the default set of keywords (see above). Any keywords
|
|
|
|
|
explicitly added with the -k/--keyword option are still recognized.
|
|
|
|
|
|
|
|
|
|
-o filename
|
|
|
|
|
--output=filename
|
|
|
|
|
Rename the default output file from messages.pot to filename. If
|
|
|
|
|
filename is `-' then the output is sent to standard out.
|
|
|
|
|
|
|
|
|
|
-p dir
|
|
|
|
|
--output-dir=dir
|
|
|
|
|
Output files will be placed in directory dir.
|
|
|
|
|
|
|
|
|
|
-v
|
|
|
|
|
--verbose
|
|
|
|
|
Print the names of the files being processed.
|
|
|
|
|
|
|
|
|
|
-V
|
|
|
|
|
--version
|
|
|
|
|
Print the version of pygettext and exit.
|
|
|
|
|
|
|
|
|
|
-w columns
|
|
|
|
|
--width=columns
|
|
|
|
|
Set width of output to columns.
|
|
|
|
|
|
|
|
|
|
-x filename
|
|
|
|
|
--exclude-file=filename
|
|
|
|
|
Specify a file that contains a list of strings that are not be
|
|
|
|
|
extracted from the input files. Each string to be excluded must
|
|
|
|
|
appear on a line by itself in the file.
|
|
|
|
|
|
|
|
|
|
If `inputfile' is -, standard input is read.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
import getopt
|
|
|
|
|
import tokenize
|
|
|
|
|
import operator
|
|
|
|
|
import re
|
2003-02-12 04:25:27 +00:00
|
|
|
|
import os
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
|
|
|
|
from xml.sax import make_parser,handler,SAXParseException
|
|
|
|
|
|
|
|
|
|
intRe = re.compile("^\d+$")
|
|
|
|
|
|
|
|
|
|
_ignore = {
|
|
|
|
|
':' : 0,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# for selftesting
|
|
|
|
|
try:
|
|
|
|
|
import fintl
|
|
|
|
|
_ = fintl.gettext
|
|
|
|
|
except ImportError:
|
|
|
|
|
def _(s): return s
|
|
|
|
|
|
|
|
|
|
__version__ = '1.4'
|
2005-07-08 20:24:54 +00:00
|
|
|
|
default_keywords = ['_','__']
|
2002-10-20 14:25:16 +00:00
|
|
|
|
EMPTYSTRING = ''
|
|
|
|
|
|
|
|
|
|
|
2003-02-12 04:25:27 +00:00
|
|
|
|
_int_re = re.compile("^\d+$")
|
|
|
|
|
_ignore = { ':' : 0, '*' : 0, }
|
|
|
|
|
|
|
|
|
|
class GladeExtractor:
|
|
|
|
|
|
2002-10-20 14:25:16 +00:00
|
|
|
|
def __init__(self,msgs):
|
|
|
|
|
self.strings = msgs
|
|
|
|
|
|
2003-02-13 05:15:39 +00:00
|
|
|
|
def add_string(self, str, lineno):
|
|
|
|
|
if str.strip() == "":
|
2002-10-20 14:25:16 +00:00
|
|
|
|
return
|
2003-02-13 05:15:39 +00:00
|
|
|
|
if _ignore.has_key(str):
|
2002-10-20 14:25:16 +00:00
|
|
|
|
return
|
2003-02-12 04:25:27 +00:00
|
|
|
|
entry = (self.file, lineno)
|
2003-02-13 05:15:39 +00:00
|
|
|
|
if self.strings.has_key(str):
|
|
|
|
|
self.strings[str][entry] = 0
|
2002-10-20 14:25:16 +00:00
|
|
|
|
else:
|
2003-02-13 05:15:39 +00:00
|
|
|
|
self.strings[str] = {entry: 0}
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
2003-02-12 04:25:27 +00:00
|
|
|
|
def parse(self,file):
|
|
|
|
|
self.p = make_parser()
|
|
|
|
|
self.p.setContentHandler(GladeParser(self,file))
|
|
|
|
|
filename = "file://" + os.path.abspath(file)
|
|
|
|
|
self.file = file
|
|
|
|
|
self.p.parse(filename)
|
|
|
|
|
|
|
|
|
|
class GladeParser(handler.ContentHandler):
|
|
|
|
|
"""
|
|
|
|
|
SAX parsing class for the StyleSheetList XML file.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,parent,filename):
|
|
|
|
|
"""
|
|
|
|
|
Creates a SheetParser class that populates the passed StyleSheetList
|
|
|
|
|
class.
|
|
|
|
|
|
|
|
|
|
sheetlist - StyleSheetList instance to be loaded from the file.
|
|
|
|
|
"""
|
2002-10-20 14:25:16 +00:00
|
|
|
|
handler.ContentHandler.__init__(self)
|
2003-02-12 04:25:27 +00:00
|
|
|
|
self.parent = parent
|
|
|
|
|
self.translate = 0
|
|
|
|
|
self.text = ""
|
|
|
|
|
self.filename = filename
|
|
|
|
|
self.lineno = 0
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
|
|
|
|
def startElement(self,tag,attrs):
|
2003-02-12 04:25:27 +00:00
|
|
|
|
"""
|
|
|
|
|
Overridden class that handles the start of a XML element
|
|
|
|
|
"""
|
|
|
|
|
if tag == "property":
|
|
|
|
|
if attrs.has_key('translatable'):
|
|
|
|
|
self.text = ""
|
|
|
|
|
if attrs['translatable'] == 'yes':
|
|
|
|
|
self.translate = 1
|
|
|
|
|
else:
|
|
|
|
|
self.translate = 0
|
|
|
|
|
|
|
|
|
|
def endElement(self,tag):
|
|
|
|
|
"Overridden class that handles the start of a XML element"
|
|
|
|
|
if self.translate:
|
|
|
|
|
if not _int_re.match(self.text):
|
|
|
|
|
self.parent.add_string(self.text, self.locator.getLineNumber())
|
|
|
|
|
self.translate = 0
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
|
|
|
|
def setDocumentLocator(self,locator):
|
|
|
|
|
self.locator = locator
|
|
|
|
|
|
2003-02-12 04:25:27 +00:00
|
|
|
|
def characters(self, data):
|
|
|
|
|
self.text = self.text + data
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
2005-05-24 13:08:06 +00:00
|
|
|
|
class TipExtractor:
|
|
|
|
|
|
|
|
|
|
def __init__(self,msgs):
|
|
|
|
|
self.strings = msgs
|
|
|
|
|
|
|
|
|
|
def add_string(self, str, lineno):
|
|
|
|
|
if str.strip() == "":
|
|
|
|
|
return
|
|
|
|
|
if _ignore.has_key(str):
|
|
|
|
|
return
|
|
|
|
|
entry = (self.file, lineno)
|
|
|
|
|
if self.strings.has_key(str):
|
|
|
|
|
self.strings[str][entry] = 0
|
|
|
|
|
else:
|
|
|
|
|
self.strings[str] = {entry: 0}
|
|
|
|
|
|
|
|
|
|
def parse(self,file):
|
|
|
|
|
self.p = make_parser()
|
|
|
|
|
self.p.setContentHandler(TipParser(self,file))
|
|
|
|
|
filename = "file://" + os.path.abspath(file)
|
|
|
|
|
self.file = file
|
|
|
|
|
self.p.parse(filename)
|
|
|
|
|
|
|
|
|
|
class TipParser(handler.ContentHandler):
|
|
|
|
|
"""
|
|
|
|
|
SAX parsing class for the Tips XML file.
|
|
|
|
|
|
|
|
|
|
This parser needs to extract strings in *exactly* the same way
|
|
|
|
|
as the TipOfDay.TipParser does. Otherwise, msgid's won't be correctly
|
|
|
|
|
matched.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self,parent,filename):
|
|
|
|
|
"""
|
|
|
|
|
Creates a SheetParser class that populates the passed StyleSheetList
|
|
|
|
|
class.
|
|
|
|
|
|
|
|
|
|
sheetlist - StyleSheetList instance to be loaded from the file.
|
|
|
|
|
"""
|
|
|
|
|
handler.ContentHandler.__init__(self)
|
|
|
|
|
self.parent = parent
|
|
|
|
|
self.translate = 0
|
|
|
|
|
self.text = ""
|
|
|
|
|
self.filename = filename
|
|
|
|
|
self.lineno = 0
|
|
|
|
|
|
|
|
|
|
def startElement(self,tag,attrs):
|
|
|
|
|
"""
|
|
|
|
|
Overridden class that handles the start of a XML element
|
|
|
|
|
"""
|
|
|
|
|
if tag == "tip":
|
|
|
|
|
self.text = ""
|
|
|
|
|
elif tag != "tips":
|
|
|
|
|
# let all the other tags through, except for the "tips" tag
|
|
|
|
|
self.text = self.text + "<%s>" % tag
|
|
|
|
|
|
|
|
|
|
def endElement(self,tag):
|
|
|
|
|
"Overridden class that handles the start of a XML element"
|
|
|
|
|
if tag == "tip":
|
|
|
|
|
if not _int_re.match(self.text):
|
|
|
|
|
text = self.escape(self.text)
|
|
|
|
|
self.parent.add_string(' '.join(text.split()),
|
|
|
|
|
self.locator.getLineNumber())
|
|
|
|
|
elif tag != "tips":
|
|
|
|
|
# let all the other tags through, except for the "tips" tag
|
|
|
|
|
self.text = self.text + "</%s>" % tag
|
|
|
|
|
|
|
|
|
|
def setDocumentLocator(self,locator):
|
|
|
|
|
self.locator = locator
|
|
|
|
|
|
|
|
|
|
def characters(self, data):
|
|
|
|
|
self.text = self.text + data
|
|
|
|
|
|
|
|
|
|
def escape(self,text):
|
|
|
|
|
"""
|
|
|
|
|
The tip's text will be interpreted as a markup, so we need to escape
|
|
|
|
|
some special chars.
|
|
|
|
|
"""
|
|
|
|
|
text = text.replace('&','&'); # Must be first
|
|
|
|
|
return text
|
|
|
|
|
|
2002-10-20 14:25:16 +00:00
|
|
|
|
# The normal pot-file header. msgmerge and Emacs's po-mode work better if it's
|
|
|
|
|
# there.
|
|
|
|
|
pot_header = _('''\
|
|
|
|
|
# GRAMPS
|
|
|
|
|
# Copyright (C) YEAR ORGANIZATION
|
|
|
|
|
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
|
|
|
|
#
|
|
|
|
|
msgid ""
|
|
|
|
|
msgstr ""
|
2003-02-12 04:25:27 +00:00
|
|
|
|
"Project-Id-Version: GRAMPS VERSION\\n"
|
2002-10-20 14:25:16 +00:00
|
|
|
|
"POT-Creation-Date: %(time)s\\n"
|
|
|
|
|
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
|
|
|
|
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
|
|
|
|
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
|
|
|
|
"MIME-Version: 1.0\\n"
|
|
|
|
|
"Content-Type: text/plain; charset=iso-8859-1\\n"
|
|
|
|
|
"Content-Transfer-Encoding: 8bit\\n"
|
|
|
|
|
"Generated-By: pygettext.py %(version)s\\n"
|
|
|
|
|
|
|
|
|
|
''')
|
|
|
|
|
|
|
|
|
|
def usage(code, msg=''):
|
|
|
|
|
sys.stderr.write(_(__doc__) % globals())
|
|
|
|
|
if msg:
|
|
|
|
|
sys.stderr.write(msg)
|
|
|
|
|
sys.stderr.write('\n')
|
|
|
|
|
sys.exit(code)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
escapes = []
|
|
|
|
|
|
|
|
|
|
def make_escapes(pass_iso8859):
|
|
|
|
|
global escapes
|
|
|
|
|
if pass_iso8859:
|
|
|
|
|
# Allow iso-8859 characters to pass through so that e.g. 'msgid
|
|
|
|
|
# "H<>he"' would result not result in 'msgid "H\366he"'. Otherwise we
|
|
|
|
|
# escape any character outside the 32..126 range.
|
|
|
|
|
mod = 128
|
|
|
|
|
else:
|
|
|
|
|
mod = 256
|
|
|
|
|
for i in range(256):
|
|
|
|
|
if 32 <= (i % mod) <= 126:
|
|
|
|
|
escapes.append(chr(i))
|
|
|
|
|
else:
|
|
|
|
|
escapes.append("\\%03o" % i)
|
|
|
|
|
escapes[ord('\\')] = '\\\\'
|
|
|
|
|
escapes[ord('\t')] = '\\t'
|
|
|
|
|
escapes[ord('\r')] = '\\r'
|
|
|
|
|
escapes[ord('\n')] = '\\n'
|
|
|
|
|
escapes[ord('\"')] = '\\"'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def escape(s):
|
|
|
|
|
global escapes
|
|
|
|
|
s = list(s)
|
|
|
|
|
for i in range(len(s)):
|
|
|
|
|
s[i] = escapes[ord(s[i])]
|
2005-05-24 13:08:06 +00:00
|
|
|
|
return ''.join(s)
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def safe_eval(s):
|
|
|
|
|
# unwrap quotes, safely
|
|
|
|
|
return eval(s, {'__builtins__':{}}, {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize(s):
|
|
|
|
|
# This converts the various Python string types into a format that is
|
|
|
|
|
# appropriate for .po files, namely much closer to C style.
|
2005-05-24 13:08:06 +00:00
|
|
|
|
lines = s.split('\n')
|
2002-10-20 14:25:16 +00:00
|
|
|
|
if len(lines) == 1:
|
|
|
|
|
s = '"' + escape(s) + '"'
|
|
|
|
|
else:
|
|
|
|
|
if not lines[-1]:
|
|
|
|
|
del lines[-1]
|
|
|
|
|
lines[-1] = lines[-1] + '\n'
|
|
|
|
|
for i in range(len(lines)):
|
|
|
|
|
lines[i] = escape(lines[i])
|
|
|
|
|
lineterm = '\\n"\n"'
|
2005-05-24 13:08:06 +00:00
|
|
|
|
s = '""\n"' + lineterm.join(lines) + '"'
|
2002-10-20 14:25:16 +00:00
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
class TokenEater:
|
|
|
|
|
def __init__(self, options):
|
|
|
|
|
self.__options = options
|
|
|
|
|
self.__messages = {}
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = -1
|
|
|
|
|
self.__freshmodule = 1
|
|
|
|
|
self.__curfile = None
|
|
|
|
|
|
|
|
|
|
def __call__(self, ttype, tstring, stup, etup, line):
|
|
|
|
|
self.__state(ttype, tstring, stup[0])
|
|
|
|
|
|
|
|
|
|
def __waiting(self, ttype, tstring, lineno):
|
|
|
|
|
opts = self.__options
|
|
|
|
|
if ttype == tokenize.NAME and tstring in opts.keywords:
|
|
|
|
|
self.__state = self.__keywordseen
|
|
|
|
|
|
|
|
|
|
def __suiteseen(self, ttype, tstring, lineno):
|
|
|
|
|
# ignore anything until we see the colon
|
|
|
|
|
if ttype == tokenize.OP and tstring == ':':
|
|
|
|
|
self.__state = self.__suitedocstring
|
|
|
|
|
|
|
|
|
|
def __suitedocstring(self, ttype, tstring, lineno):
|
|
|
|
|
# ignore any intervening noise
|
|
|
|
|
if ttype == tokenize.STRING:
|
|
|
|
|
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
|
|
|
|
tokenize.COMMENT):
|
|
|
|
|
# there was no class docstring
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
|
|
|
|
|
def __keywordseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == '(':
|
|
|
|
|
self.__data = []
|
|
|
|
|
self.__lineno = lineno
|
|
|
|
|
self.__state = self.__openseen
|
|
|
|
|
else:
|
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
|
|
|
|
|
def __openseen(self, ttype, tstring, lineno):
|
|
|
|
|
if ttype == tokenize.OP and tstring == ')':
|
|
|
|
|
# We've seen the last of the translatable strings. Record the
|
|
|
|
|
# line number of the first line of the strings and update the list
|
|
|
|
|
# of messages seen. Reset state for the next batch. If there
|
|
|
|
|
# were no strings inside _(), then just ignore this entry.
|
|
|
|
|
if self.__data:
|
2005-05-24 13:08:06 +00:00
|
|
|
|
self.__addentry(''.join(self.__data))
|
2002-10-20 14:25:16 +00:00
|
|
|
|
self.__state = self.__waiting
|
|
|
|
|
elif ttype == tokenize.STRING:
|
|
|
|
|
self.__data.append(safe_eval(tstring))
|
|
|
|
|
# TBD: should we warn if we seen anything else?
|
|
|
|
|
|
|
|
|
|
def __addentry(self, msg, lineno=None, isdocstring=0):
|
|
|
|
|
if lineno is None:
|
|
|
|
|
lineno = self.__lineno
|
|
|
|
|
if not msg in self.__options.toexclude:
|
|
|
|
|
entry = (self.__curfile, lineno)
|
|
|
|
|
if self.__messages.has_key(msg):
|
|
|
|
|
self.__messages[msg][entry] = isdocstring
|
|
|
|
|
else:
|
|
|
|
|
self.__messages[msg] = {entry:isdocstring}
|
|
|
|
|
|
|
|
|
|
def set_filename(self, filename):
|
|
|
|
|
self.__curfile = filename
|
|
|
|
|
self.__freshmodule = 1
|
|
|
|
|
|
|
|
|
|
def get_messages(self):
|
|
|
|
|
return self.__messages
|
|
|
|
|
|
|
|
|
|
def write(self, fp):
|
|
|
|
|
options = self.__options
|
|
|
|
|
timestamp = time.ctime(time.time())
|
|
|
|
|
# The time stamp in the header doesn't have the same format as that
|
|
|
|
|
# generated by xgettext...
|
|
|
|
|
fp.write(pot_header % {'time': timestamp, 'version': __version__})
|
|
|
|
|
fp.write('\n')
|
|
|
|
|
# Sort the entries. First sort each particular entry's keys, then
|
|
|
|
|
# sort all the entries by their first item.
|
|
|
|
|
reverse = {}
|
|
|
|
|
for k, v in self.__messages.items():
|
|
|
|
|
keys = v.keys()
|
|
|
|
|
keys.sort()
|
|
|
|
|
if reverse.has_key(tuple(keys)):
|
|
|
|
|
reverse[tuple(keys)].append((k,v))
|
|
|
|
|
else:
|
|
|
|
|
reverse[tuple(keys)] = [(k,v)]
|
|
|
|
|
# reverse.setdefault(tuple(keys), []).append((k, v))
|
|
|
|
|
rkeys = reverse.keys()
|
|
|
|
|
rkeys.sort()
|
|
|
|
|
for rkey in rkeys:
|
|
|
|
|
rentries = reverse[rkey]
|
|
|
|
|
rentries.sort()
|
|
|
|
|
for k, v in rentries:
|
|
|
|
|
isdocstring = 0
|
|
|
|
|
# If the entry was gleaned out of a docstring, then add a
|
|
|
|
|
# comment stating so. This is to aid translators who may wish
|
|
|
|
|
# to skip translating some unimportant docstrings.
|
|
|
|
|
if reduce(operator.__add__, v.values()):
|
|
|
|
|
isdocstring = 1
|
|
|
|
|
# k is the message string, v is a dictionary-set of (filename,
|
|
|
|
|
# lineno) tuples. We want to sort the entries in v first by
|
|
|
|
|
# file name and then by line number.
|
|
|
|
|
v = v.keys()
|
|
|
|
|
v.sort()
|
|
|
|
|
locline = '#:'
|
|
|
|
|
for filename, lineno in v:
|
|
|
|
|
d = {'filename': filename, 'lineno': lineno}
|
|
|
|
|
s = _(' %(filename)s:%(lineno)d') % d
|
|
|
|
|
if len(locline) + len(s) <= options.width:
|
|
|
|
|
locline = locline + s
|
|
|
|
|
else:
|
|
|
|
|
fp.write(locline + "\n");
|
|
|
|
|
locline = "#:" + s
|
|
|
|
|
if len(locline) > 2:
|
|
|
|
|
fp.write(locline + "\n")
|
|
|
|
|
fp.write('msgid ' + normalize(k) + "\n")
|
|
|
|
|
fp.write('msgstr ""\n' + "\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
global default_keywords
|
|
|
|
|
try:
|
|
|
|
|
opts, args = getopt.getopt(
|
|
|
|
|
sys.argv[1:],
|
|
|
|
|
'ad:DEhk:Kno:p:S:Vvw:x:X:',
|
|
|
|
|
['extract-all', 'default-domain=', 'escape', 'help',
|
|
|
|
|
'keyword=', 'no-default-keywords',
|
|
|
|
|
'output=', 'output-dir=',
|
|
|
|
|
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
|
|
|
|
'docstrings', 'no-docstrings',
|
|
|
|
|
])
|
|
|
|
|
except getopt.error, msg:
|
|
|
|
|
usage(1, msg)
|
|
|
|
|
|
|
|
|
|
# for holding option values
|
|
|
|
|
class Options:
|
|
|
|
|
# constants
|
|
|
|
|
# defaults
|
|
|
|
|
extractall = 0 # FIXME: currently this option has no effect at all.
|
|
|
|
|
escape = 0
|
|
|
|
|
keywords = []
|
|
|
|
|
outpath = ''
|
|
|
|
|
outfile = 'messages.pot'
|
|
|
|
|
verbose = 0
|
|
|
|
|
width = 78
|
|
|
|
|
excludefilename = ''
|
|
|
|
|
docstrings = 0
|
|
|
|
|
nodocstrings = {}
|
|
|
|
|
|
|
|
|
|
options = Options()
|
|
|
|
|
|
|
|
|
|
# parse options
|
|
|
|
|
for opt, arg in opts:
|
|
|
|
|
if opt in ('-h', '--help'):
|
|
|
|
|
usage(0)
|
|
|
|
|
elif opt in ('-a', '--extract-all'):
|
|
|
|
|
options.extractall = 1
|
|
|
|
|
elif opt in ('-d', '--default-domain'):
|
|
|
|
|
options.outfile = arg + '.pot'
|
|
|
|
|
elif opt in ('-E', '--escape'):
|
|
|
|
|
options.escape = 1
|
|
|
|
|
elif opt in ('-k', '--keyword'):
|
|
|
|
|
options.keywords.append(arg)
|
|
|
|
|
elif opt in ('-K', '--no-default-keywords'):
|
|
|
|
|
default_keywords = []
|
|
|
|
|
elif opt in ('-o', '--output'):
|
|
|
|
|
options.outfile = arg
|
|
|
|
|
elif opt in ('-p', '--output-dir'):
|
|
|
|
|
options.outpath = arg
|
|
|
|
|
elif opt in ('-v', '--verbose'):
|
|
|
|
|
options.verbose = 1
|
|
|
|
|
elif opt in ('-V', '--version'):
|
|
|
|
|
print _('pygettext.py (xgettext for Python) %s') % __version__
|
|
|
|
|
sys.exit(0)
|
|
|
|
|
elif opt in ('-w', '--width'):
|
|
|
|
|
try:
|
|
|
|
|
options.width = int(arg)
|
|
|
|
|
except ValueError:
|
|
|
|
|
usage(1, _('--width argument must be an integer: %s') % arg)
|
|
|
|
|
elif opt in ('-x', '--exclude-file'):
|
|
|
|
|
options.excludefilename = arg
|
|
|
|
|
|
|
|
|
|
# calculate escapes
|
|
|
|
|
make_escapes(options.escape)
|
|
|
|
|
|
|
|
|
|
# calculate all keywords
|
|
|
|
|
options.keywords.extend(default_keywords)
|
|
|
|
|
|
|
|
|
|
# initialize list of strings to exclude
|
|
|
|
|
if options.excludefilename:
|
|
|
|
|
try:
|
|
|
|
|
fp = open(options.excludefilename)
|
|
|
|
|
options.toexclude = fp.readlines()
|
|
|
|
|
fp.close()
|
|
|
|
|
except IOError:
|
|
|
|
|
sys.stderr.write(_("Can't read --exclude-file: %s") % options.excludefilename)
|
|
|
|
|
sys.exit(1)
|
|
|
|
|
else:
|
|
|
|
|
options.toexclude = []
|
|
|
|
|
|
|
|
|
|
# slurp through all the files
|
|
|
|
|
eater = TokenEater(options)
|
2003-02-12 04:25:27 +00:00
|
|
|
|
p = GladeExtractor(eater.get_messages())
|
2005-05-24 13:08:06 +00:00
|
|
|
|
tp = TipExtractor(eater.get_messages())
|
2002-10-20 14:25:16 +00:00
|
|
|
|
|
|
|
|
|
for filename in args:
|
|
|
|
|
if filename[-5:] == 'glade':
|
|
|
|
|
print 'Working on %s' % filename
|
2003-02-12 04:25:27 +00:00
|
|
|
|
p.parse(filename)
|
2005-05-24 13:08:06 +00:00
|
|
|
|
elif filename[-8:] == 'tips.xml':
|
|
|
|
|
# Using our own custom Tips parser for tips.xml
|
2002-10-20 14:25:16 +00:00
|
|
|
|
print 'Working on %s' % filename
|
2005-05-24 13:08:06 +00:00
|
|
|
|
tp.parse(filename)
|
|
|
|
|
elif filename[-3:] == 'xml':
|
|
|
|
|
# THIS IS NOT WORKING -- something has changed in SAX/Expat
|
2002-10-20 14:25:16 +00:00
|
|
|
|
try:
|
|
|
|
|
parser = make_parser()
|
|
|
|
|
pxml = XMLParser(filename,eater.get_messages())
|
|
|
|
|
parser.setContentHandler(pxml)
|
|
|
|
|
parser.parse(filename)
|
|
|
|
|
except (IOError,OSError,SAXParseException):
|
|
|
|
|
print 'failed on %s' % filename
|
|
|
|
|
else:
|
|
|
|
|
print 'Working on %s' % filename
|
|
|
|
|
fp = open(filename)
|
|
|
|
|
closep = 1
|
|
|
|
|
try:
|
|
|
|
|
eater.set_filename(filename)
|
|
|
|
|
try:
|
|
|
|
|
tokenize.tokenize(fp.readline, eater)
|
|
|
|
|
except tokenize.TokenError, e:
|
|
|
|
|
sys.stderr.write('%s: %s, line %d, column %d' % (e[0], filename, e[1][0], e[1][1]))
|
|
|
|
|
finally:
|
|
|
|
|
if closep:
|
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
|
|
# write the output
|
|
|
|
|
if options.outfile == '-':
|
|
|
|
|
fp = sys.stdout
|
|
|
|
|
closep = 0
|
|
|
|
|
else:
|
|
|
|
|
if options.outpath:
|
|
|
|
|
options.outfile = os.path.join(options.outpath, options.outfile)
|
|
|
|
|
fp = open(options.outfile, 'w')
|
|
|
|
|
closep = 1
|
|
|
|
|
try:
|
|
|
|
|
eater.write(fp)
|
|
|
|
|
finally:
|
|
|
|
|
if closep:
|
|
|
|
|
fp.close()
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|