2006-05-02 02:41:26 +05:30
|
|
|
#! /usr/bin/env python
|
|
|
|
#
|
2006-11-09 00:52:08 +05:30
|
|
|
# check_po - a gramps tool to check validity of po files
|
2006-05-02 02:41:26 +05:30
|
|
|
#
|
2006-11-09 00:52:08 +05:30
|
|
|
# Copyright (C) 2006-2006 Kees Bakker
|
2006-05-02 02:41:26 +05:30
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
2014-08-09 07:59:07 +05:30
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2006-05-02 02:41:26 +05:30
|
|
|
|
2006-11-16 02:30:24 +05:30
|
|
|
#
|
|
|
|
# TODO
|
|
|
|
#
|
|
|
|
# * Check for HTML text in msgstr when there is none in msgid
|
|
|
|
# * Check for matching HTML tag/endtag in msgstr
|
|
|
|
#
|
|
|
|
|
2006-05-02 02:41:26 +05:30
|
|
|
import sys
|
|
|
|
import re
|
2012-12-02 19:22:37 +05:30
|
|
|
import os
|
2015-07-15 23:14:23 +05:30
|
|
|
import io
|
2012-12-02 19:22:37 +05:30
|
|
|
from argparse import ArgumentParser
|
2006-05-02 02:41:26 +05:30
|
|
|
|
|
|
|
all_total = {}
|
|
|
|
all_fuzzy = {}
|
|
|
|
all_untranslated = {}
|
|
|
|
all_percent_s = {}
|
|
|
|
all_named_s = {}
|
|
|
|
all_bnamed_s = {}
|
|
|
|
all_context = {}
|
|
|
|
all_coverage = {}
|
|
|
|
all_template_coverage = {}
|
|
|
|
|
|
|
|
def strip_quotes(st):
|
2015-03-06 05:53:33 +05:30
|
|
|
if len(st) >= 2 and st[0] == '"' and st[len(st)-1] == '"':
|
|
|
|
st = st.strip()[1:-1]
|
|
|
|
return st
|
2006-11-09 00:52:08 +05:30
|
|
|
|
2009-02-06 02:36:39 +05:30
|
|
|
class CheckException( Exception ):
|
2015-03-06 05:53:33 +05:30
|
|
|
pass
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-12 01:33:53 +05:30
|
|
|
# This is a base class for all checks
|
|
|
|
class Check:
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self ):
|
|
|
|
self.msgs = []
|
|
|
|
def diag( self ):
|
|
|
|
if len( self.msgs ):
|
|
|
|
print
|
|
|
|
print(self.diag_header)
|
|
|
|
for m in self.msgs:
|
|
|
|
m.diag()
|
|
|
|
def summary( self ):
|
|
|
|
print("%-20s%d" % ( self.summary_text, len(self.msgs) ))
|
2006-11-12 01:33:53 +05:30
|
|
|
|
|
|
|
class Check_fmt( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self, fmt ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- %s mismatches --------------" % fmt
|
|
|
|
self.summary_text = "%s mismatches:" % fmt
|
|
|
|
self.fmt = fmt
|
|
|
|
|
|
|
|
def __process( self, msg, msgid, msgstr ):
|
|
|
|
cnt1 = msgid.count( self.fmt )
|
|
|
|
cnt2 = msgstr.count( self.fmt )
|
|
|
|
if cnt1 != cnt2:
|
|
|
|
self.msgs.append( msg )
|
|
|
|
|
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-12 01:33:53 +05:30
|
|
|
class Check_named_fmt( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
# A pattern to find all %()
|
|
|
|
find_named_fmt_pat = re.compile('% \( \w+ \) \d* \D', re.VERBOSE)
|
|
|
|
|
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- %() name mismatches --------------"
|
|
|
|
self.summary_text = "%() name mismatches:"
|
|
|
|
|
|
|
|
def __process( self, msg, msgid, msgstr ):
|
|
|
|
# Same number of named formats?
|
|
|
|
fmts1 = self.find_named_fmt_pat.findall( msgid )
|
|
|
|
fmts2 = self.find_named_fmt_pat.findall( msgstr )
|
|
|
|
if len( fmts1 ) != len( fmts2 ):
|
|
|
|
self.msgs.append( msg )
|
|
|
|
else:
|
|
|
|
# Do we have the same named formats?
|
|
|
|
fmts1.sort()
|
|
|
|
fmts2.sort()
|
|
|
|
if fmts1 != fmts2:
|
|
|
|
self.msgs.append( msg )
|
|
|
|
|
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2015-04-01 19:00:47 +05:30
|
|
|
class Check_mapping_fmt( Check ):
|
|
|
|
# A pattern to find all {}
|
2015-07-15 23:14:23 +05:30
|
|
|
find_map_pat = re.compile('\{\w+\.?f?\[?.*?\]?\}',
|
|
|
|
re.UNICODE) # needed for Russian index letters
|
2015-04-01 19:00:47 +05:30
|
|
|
|
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- {} name mismatches --------------"
|
|
|
|
self.summary_text = "{} name mismatches:"
|
|
|
|
|
|
|
|
def __process( self, msg, msgid, msgstr ):
|
|
|
|
# Same number of named formats?
|
|
|
|
fmts1 = self.find_map_pat.findall( msgid )
|
|
|
|
fmts2 = self.find_map_pat.findall( msgstr )
|
|
|
|
if len( fmts1 ) != len( fmts2 ):
|
|
|
|
self.msgs.append( msg )
|
|
|
|
else:
|
|
|
|
# Do we have the same named formats?
|
|
|
|
fmts1.sort()
|
|
|
|
fmts2.sort()
|
|
|
|
if fmts1 != fmts2:
|
2015-07-15 23:14:23 +05:30
|
|
|
for idx in range(len(fmts2)):
|
|
|
|
fmts2[idx] = re.sub( r'\.f\[.+\]', '', fmts2[idx] )
|
|
|
|
if fmts1 != fmts2:
|
|
|
|
self.msgs.append( msg )
|
2015-04-01 19:00:47 +05:30
|
|
|
|
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
2006-11-12 01:33:53 +05:30
|
|
|
class Check_missing_sd( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
# A pattern to find %() without s or d
|
|
|
|
# Here is a command to use for testing
|
|
|
|
# print(re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE).findall( '%(event_name)s: %(place)s%(endnotes)s. ' ))
|
|
|
|
find_named_fmt_pat2 = re.compile('% \( \w+ \) \d* (\D|$)', re.VERBOSE)
|
|
|
|
|
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- %() without 's' or 'd' mismatches --------------"
|
|
|
|
self.summary_text = "%() missing s/d:"
|
|
|
|
def process( self, msg ):
|
|
|
|
for msgstr in msg.msgstr:
|
|
|
|
fmts = self.find_named_fmt_pat2.findall( msgstr )
|
|
|
|
for f in fmts:
|
|
|
|
if not f in ('s', 'd'):
|
|
|
|
self.msgs.append( msg )
|
|
|
|
break
|
2006-11-12 01:33:53 +05:30
|
|
|
|
|
|
|
class Check_runaway( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- Runaway context in translation ---------"
|
|
|
|
self.summary_text = "Runaway context:"
|
|
|
|
|
|
|
|
def __process( self, msg, msgid, msgstr ):
|
2015-07-15 23:14:23 +05:30
|
|
|
# Runaway context. In the translated part we only want to see
|
2015-03-06 05:53:33 +05:30
|
|
|
# the translation of the word after the |
|
2015-07-15 23:14:23 +05:30
|
|
|
if (msgid.count('|') > 0 and msgstr.count('|') > 0
|
|
|
|
and msgid != msgstr
|
|
|
|
and not (' lexeme' in msgid) # _datestrings.py keyword
|
|
|
|
and not ('alternative month names ' in msgid) # this one too
|
|
|
|
):
|
2015-03-06 05:53:33 +05:30
|
|
|
self.msgs.append( msg )
|
|
|
|
|
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-12 01:33:53 +05:30
|
|
|
class Check_xml_chars( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
# Special XML characters
|
|
|
|
# It is not allowed to have a quote, an ampersand or an angle bracket
|
|
|
|
xml_chars_pat = re.compile( r'<(?!(b>|/b>|i>|/i>|br/>)) | (?<=!(<b|/b|<i|/i|r/))> | " | & (?!(quot|nbsp|gt|amp);)', re.VERBOSE )
|
2006-11-12 01:33:53 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- unescaped XML special characters ---------"
|
|
|
|
self.summary_text = "XML special chars:"
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
2006-11-12 01:33:53 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
# XML errors
|
|
|
|
# Only look at messages in the tips.xml
|
|
|
|
if msg.is_tips_xml:
|
|
|
|
if self.xml_chars_pat.search( msgstr ):
|
|
|
|
self.msgs.append( msg )
|
2006-11-12 01:33:53 +05:30
|
|
|
|
|
|
|
class Check_last_char( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- last character not identical ---------"
|
|
|
|
self.summary_text = "Last character:"
|
|
|
|
|
|
|
|
def __process( self, msg, msgid, msgstr ):
|
|
|
|
msgid_last = msgid[-1:]
|
2015-06-07 23:15:47 +05:30
|
|
|
msgstr_last = msgstr[-1:]
|
2015-03-06 05:53:33 +05:30
|
|
|
if msgid_last.isspace() != msgstr_last.isspace():
|
|
|
|
self.msgs.append( msg )
|
2015-03-06 06:27:46 +05:30
|
|
|
elif (msgid_last == '.') != (msgstr_last == '.' or
|
2015-06-07 23:15:47 +05:30
|
|
|
msgstr_last == '\u3002' ): # Chinese
|
2015-03-06 05:53:33 +05:30
|
|
|
self.msgs.append( msg )
|
2015-03-06 06:27:46 +05:30
|
|
|
elif (msgid_last == ':') != (msgstr_last == ':' or
|
2015-06-07 23:15:47 +05:30
|
|
|
msgstr_last == '\uff1a' ): # Chinese
|
2015-03-06 06:27:46 +05:30
|
|
|
self.msgs.append( msg )
|
|
|
|
elif (msgid_last == ')') != (msgstr_last == ')' or
|
2015-06-07 23:15:47 +05:30
|
|
|
msgstr_last == '\uff09' ): # Chinese
|
2015-03-06 06:27:46 +05:30
|
|
|
self.msgs.append( msg )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
|
|
|
|
def process( self, msg ):
|
|
|
|
# Last character of msgid? White space? Period?
|
|
|
|
if msg.is_fuzzy:
|
|
|
|
return
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
|
|
|
|
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-21 12:47:45 +05:30
|
|
|
class Check_shortcut_trans( Check ):
|
2015-03-06 05:53:33 +05:30
|
|
|
def __init__( self ):
|
|
|
|
Check.__init__( self )
|
|
|
|
self.diag_header = "-------- shortcut key in translation ---------"
|
|
|
|
self.summary_text = "Shortcut in msgstr:"
|
2006-11-21 12:47:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
def __process( self, msg, msgid, msgstr ):
|
|
|
|
if msgid.count('_') == 0 and msgstr.count('_') > 0:
|
|
|
|
self.msgs.append( msg )
|
2006-11-21 12:47:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
def process( self, msg ):
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr[0]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
if msg.msgidp and len(msg.msgstr) >= 2:
|
|
|
|
msgid = msg.msgidp
|
|
|
|
msgstr = msg.msgstr[1]
|
|
|
|
self.__process( msg, msgid, msgstr )
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-09 00:52:08 +05:30
|
|
|
class Msgid:
|
2015-03-06 05:53:33 +05:30
|
|
|
fuzzy_pat = re.compile( 'fuzzy' )
|
|
|
|
tips_xml_pat = re.compile( r'tips\.xml' )
|
|
|
|
def __init__( self, msgnr, lineno ):
|
2023-07-31 22:13:54 +05:30
|
|
|
self._msgctxt = [] # For debugging purpose the original text
|
2015-03-06 05:53:33 +05:30
|
|
|
self._msgid = [] # For debugging purpose the original text
|
|
|
|
self._msgidp = [] # For debugging purpose the original text
|
|
|
|
self._msgstr = [] # For debugging purpose the original text
|
2023-07-31 22:13:54 +05:30
|
|
|
self.msgctxt = ''
|
2015-03-06 05:53:33 +05:30
|
|
|
self.msgid = ''
|
|
|
|
self.msgidp = ''
|
|
|
|
self.msgstr = [] # This is a list to support plural
|
|
|
|
self._cmnt = []
|
|
|
|
self.nr = msgnr
|
|
|
|
self.lineno = lineno
|
|
|
|
self.is_fuzzy = 0
|
|
|
|
self.is_tips_xml = 0
|
|
|
|
|
|
|
|
def diag( self ):
|
|
|
|
print
|
|
|
|
print("msg nr: %d, lineno: %d%s" %
|
|
|
|
( self.nr, self.lineno, self.is_fuzzy and " (fuzzy)" or "" ))
|
2015-07-29 00:30:15 +05:30
|
|
|
sys.stdout.write( ''.join( self._msgid ) )
|
2015-03-06 05:53:33 +05:30
|
|
|
sys.stdout.write( ''.join( self._msgidp ) )
|
2015-10-24 06:43:15 +05:30
|
|
|
if sys.version_info[0] < 3:
|
|
|
|
sys.stdout.write(''.join(self._msgstr).encode('utf-8'))
|
|
|
|
else:
|
|
|
|
sys.stdout.write( ''.join( self._msgstr ) )
|
2015-03-06 05:53:33 +05:30
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
def add_msgctxt( self, line, lineno ):
|
|
|
|
self._msgctxt.append( line )
|
|
|
|
line = re.sub( r'msgctxt\s+', '', line )
|
|
|
|
line = line.strip()
|
|
|
|
if line[0] != '"' or line[-1:] != '"':
|
|
|
|
print("ERROR at line %d: Missing quote." % lineno)
|
|
|
|
line = strip_quotes( line )
|
|
|
|
self.msgctxt += line
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
def add_msgid( self, line, lineno ):
|
|
|
|
self._msgid.append( line )
|
|
|
|
line = re.sub( r'msgid\s+', '', line )
|
|
|
|
line = line.strip()
|
|
|
|
if line[0] != '"' or line[-1:] != '"':
|
|
|
|
print("ERROR at line %d: Missing quote." % lineno)
|
|
|
|
line = strip_quotes( line )
|
|
|
|
self.msgid += line
|
|
|
|
|
|
|
|
def add_msgidp( self, line, lineno ):
|
|
|
|
self._msgidp.append( line )
|
|
|
|
line = re.sub( r'msgid_plural\s+', '', line )
|
|
|
|
line = line.strip()
|
|
|
|
if line[0] != '"' or line[-1:] != '"':
|
|
|
|
print("ERROR at line %d: Missing quote." % lineno)
|
|
|
|
line = strip_quotes( line )
|
|
|
|
self.msgidp += line
|
|
|
|
|
|
|
|
def add_new_msgstr( self, line, lineno ):
|
|
|
|
self.msgstr.append( '' ) # Start a new msgstr
|
|
|
|
self.add_msgstr( line, lineno )
|
|
|
|
|
|
|
|
def add_msgstr( self, line, lineno ):
|
|
|
|
self._msgstr.append( line )
|
|
|
|
line = re.sub( r'msgstr(\[\d\])?\s+', '', line )
|
|
|
|
line = line.strip()
|
|
|
|
if line[0] != '"' or line[-1:] != '"':
|
|
|
|
print("ERROR at line %d: Missing quote." % lineno)
|
|
|
|
line = strip_quotes( line )
|
|
|
|
self.msgstr[-1] += line
|
|
|
|
|
|
|
|
def add_cmnt( self, line ):
|
|
|
|
self._cmnt.append( line )
|
|
|
|
if not self.is_fuzzy and self.fuzzy_pat.search( line ):
|
|
|
|
self.is_fuzzy = 1
|
|
|
|
if not self.is_tips_xml and self.tips_xml_pat.search( line ):
|
|
|
|
self.is_tips_xml = 1
|
2006-11-09 16:25:08 +05:30
|
|
|
|
2009-02-06 15:38:24 +05:30
|
|
|
def create_new_Msgid( msgs, lineno ):
|
2015-03-06 05:53:33 +05:30
|
|
|
msg = Msgid( len(msgs), lineno )
|
|
|
|
msgs.append( msg )
|
|
|
|
return msg
|
2009-02-06 02:36:39 +05:30
|
|
|
|
2006-11-09 00:52:08 +05:30
|
|
|
def read_msgs( fname ):
|
2015-03-06 05:53:33 +05:30
|
|
|
empty_pat = re.compile( r'^ \s* $', re.VERBOSE )
|
|
|
|
comment_pat = re.compile( r'\#', re.VERBOSE )
|
2023-07-31 22:13:54 +05:30
|
|
|
msgctxt_pat = re.compile( r'msgctxt \s+ "', re.VERBOSE )
|
2015-03-06 05:53:33 +05:30
|
|
|
msgid_pat = re.compile( r'msgid \s+ "', re.VERBOSE )
|
|
|
|
msgid_plural_pat = re.compile( r'msgid_plural \s+ "', re.VERBOSE )
|
|
|
|
msgstr_pat = re.compile( r'msgstr (\[\d\])? \s+ "', re.VERBOSE )
|
|
|
|
str_pat = re.compile( r'"', re.VERBOSE )
|
|
|
|
old_pat = re.compile( r'\#~ \s+ ', re.VERBOSE )
|
|
|
|
|
2015-07-15 23:14:23 +05:30
|
|
|
f = io.open( fname, encoding='utf-8' )
|
2015-03-06 05:53:33 +05:30
|
|
|
lines = f.readlines()
|
|
|
|
|
|
|
|
# parse it like a statemachine
|
2023-07-31 22:13:54 +05:30
|
|
|
NONE = 'NONE' # Nothing detected, yet
|
|
|
|
CMNT = 'CMNT' # Inside comment part
|
|
|
|
MSGCTXT = 'msgctxt' # Inside msgctxt part
|
|
|
|
MSGID = 'msgid' # Inside msgid part
|
|
|
|
MSGIDP = 'msgid_plural' # Inside msgid_plural part
|
|
|
|
MSGSTR = 'msgstr' # Inside msgstr part
|
|
|
|
STR = 'STR' # A continuation string
|
|
|
|
OLD = 'OLD' # An old pattern with #~
|
2015-03-06 05:53:33 +05:30
|
|
|
|
|
|
|
global msgs
|
|
|
|
state = NONE
|
|
|
|
msg = None
|
|
|
|
|
|
|
|
msgs = []
|
|
|
|
for ix, line in enumerate( lines ): # Use line numbers for messages
|
|
|
|
lineno = ix + 1
|
|
|
|
|
|
|
|
m = empty_pat.match( line )
|
|
|
|
if m:
|
|
|
|
continue # Empty lines are not interesting
|
|
|
|
|
|
|
|
# What's the next state?
|
|
|
|
if old_pat.match( line ):
|
|
|
|
next_state = OLD
|
|
|
|
elif comment_pat.match( line ):
|
|
|
|
next_state = CMNT
|
2023-07-31 22:13:54 +05:30
|
|
|
elif msgctxt_pat.match( line ):
|
|
|
|
next_state = MSGCTXT
|
2015-03-06 05:53:33 +05:30
|
|
|
elif msgid_pat.match( line ):
|
|
|
|
next_state = MSGID
|
|
|
|
elif msgid_plural_pat.match( line ):
|
|
|
|
next_state = MSGIDP
|
|
|
|
elif msgstr_pat.match( line ):
|
|
|
|
next_state = MSGSTR
|
|
|
|
elif str_pat.match( line ):
|
|
|
|
next_state = STR
|
|
|
|
else:
|
|
|
|
print('WARNING: Unexpected input at %(fname)s:%(lineno)d' % vars())
|
|
|
|
next_state = NONE
|
|
|
|
|
|
|
|
#print("%(state)d->%(next_state)d\t%(line)s" % vars())
|
|
|
|
if state == NONE:
|
2023-07-31 22:13:54 +05:30
|
|
|
# expect msgctxt, msgid, comment or old stuff
|
2015-03-06 05:53:33 +05:30
|
|
|
if next_state == CMNT:
|
|
|
|
state = CMNT
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_cmnt( line )
|
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
state = MSGCTXT
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgctxt( line, lineno )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
elif next_state == MSGID:
|
|
|
|
state = MSGID
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgid( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
print('WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars())
|
|
|
|
state = MSGSTR
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
print('WARNING: Wild string at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
|
|
|
elif next_state == OLD:
|
|
|
|
pass # Just skip
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
|
|
|
|
|
|
|
elif state == CMNT:
|
2023-07-31 22:13:54 +05:30
|
|
|
# Expect more comment, msgctxt, or msgid.
|
2015-03-06 05:53:33 +05:30
|
|
|
# If msgstr or string it is flagged as error.
|
|
|
|
if next_state == CMNT:
|
|
|
|
if msg:
|
|
|
|
msg.add_cmnt( line )
|
2013-01-02 04:43:45 +05:30
|
|
|
else:
|
2015-03-06 05:53:33 +05:30
|
|
|
# Note. We may need to do something about these comments
|
|
|
|
# Skip for now
|
|
|
|
pass
|
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
state = MSGCTXT
|
|
|
|
if not msg:
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgctxt( line, lineno )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
elif next_state == MSGID:
|
|
|
|
state = MSGID
|
|
|
|
if not msg:
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgid( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
print('WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars())
|
|
|
|
state = MSGSTR
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
print('WARNING: Wild string at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
|
|
|
elif next_state == OLD:
|
|
|
|
msg = None
|
|
|
|
pass # Just skip
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
2023-07-31 22:13:54 +05:30
|
|
|
elif state == MSGCTXT:
|
|
|
|
# Expect more msgctxt or msgid.
|
|
|
|
# If msgstr or string it is flagged as error.
|
|
|
|
if next_state == CMNT:
|
|
|
|
# Hmmm. A comment here?
|
|
|
|
print('WARNING: Unexpted comment '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGID:
|
|
|
|
state = MSGID
|
|
|
|
msg.add_msgid( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
print('WARNING: Wild msgstr at %(fname)s:%(lineno)d' % vars())
|
|
|
|
state = MSGSTR
|
|
|
|
# Start with an empty new item
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
print('WARNING: Wild string at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
# Continuation of msgctxt, stay in state MSGCTXT
|
|
|
|
msg.add_msgctxt( line, lineno )
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
2015-03-06 05:53:33 +05:30
|
|
|
|
|
|
|
elif state == MSGID:
|
|
|
|
# Expect msgstr or msgid_plural or string
|
|
|
|
if next_state == CMNT:
|
|
|
|
# Hmmm. A comment here?
|
|
|
|
print('WARNING: Unexpted comment '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
elif next_state == MSGID:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
state = MSGIDP
|
|
|
|
msg.add_msgidp( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
state = MSGSTR
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
# Continuation of msgid, stay in state MSGID
|
|
|
|
msg.add_msgid( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == OLD:
|
|
|
|
msg = None
|
|
|
|
pass # Just skip
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
|
|
|
|
|
|
|
elif state == MSGIDP:
|
|
|
|
# Expect msgstr or string or comment
|
|
|
|
if next_state == CMNT:
|
|
|
|
# Hmmm. A comment here?
|
|
|
|
print('WARNING: Unexpected comment '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars())
|
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
elif next_state == MSGID:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
state = MSGSTR
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
# Continuation of msgid_plural, stay in state MSGIDP
|
|
|
|
msg.add_msgidp( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == OLD:
|
|
|
|
msg = None
|
|
|
|
pass # Just skip
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
|
|
|
|
|
|
|
elif state == MSGSTR:
|
|
|
|
# Expect comment, or msgid, or string.
|
|
|
|
if next_state == CMNT:
|
|
|
|
# A comment probably starts a new item
|
|
|
|
state = CMNT
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_cmnt( line )
|
|
|
|
|
2023-07-31 22:13:54 +05:30
|
|
|
elif next_state == MSGCTXT:
|
|
|
|
state = MSGCTXT
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgctxt( line, lineno )
|
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
elif next_state == MSGID:
|
|
|
|
state = MSGID
|
|
|
|
msg = create_new_Msgid( msgs, lineno )
|
|
|
|
msg.add_msgid( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == MSGIDP:
|
|
|
|
raise CheckException( 'Unexpected %(next_state)s '
|
|
|
|
'at %(fname)s:%(lineno)d' % vars() )
|
|
|
|
|
|
|
|
elif next_state == MSGSTR:
|
|
|
|
# New msgstr, probably for plural form
|
|
|
|
# Stay in MSGSTR state
|
|
|
|
msg.add_new_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == STR:
|
|
|
|
msg.add_msgstr( line, lineno )
|
|
|
|
|
|
|
|
elif next_state == OLD:
|
|
|
|
msg = None
|
|
|
|
pass # Just skip
|
|
|
|
|
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
else:
|
|
|
|
raise CheckException( 'Unexpected state in po parsing '
|
|
|
|
'(state = %(state)s)' % vars() )
|
|
|
|
|
|
|
|
# Strip items with just comments. (Can this happen?)
|
|
|
|
msgs1 = []
|
|
|
|
for m in msgs:
|
|
|
|
if not m.msgid and not m.msgstr:
|
|
|
|
#print("INFO: No msgid or msgstr at %s:%s" % ( fname, m.lineno ))
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
msgs1.append( m )
|
|
|
|
msgs = msgs1
|
|
|
|
return msgs
|
2006-11-09 00:52:08 +05:30
|
|
|
|
2012-12-02 19:22:37 +05:30
|
|
|
def analyze_msgs( args, fname, msgs, nr_templates = None, nth = 0 ):
|
2015-03-06 05:53:33 +05:30
|
|
|
nr_fuzzy = 0
|
|
|
|
nr_untranslated = 0
|
|
|
|
|
|
|
|
checks = []
|
|
|
|
checks.append( Check_fmt( '%s' ) )
|
|
|
|
checks.append( Check_fmt( '%d' ) )
|
|
|
|
checks.append( Check_named_fmt() )
|
2015-04-01 19:00:47 +05:30
|
|
|
checks.append( Check_mapping_fmt() )
|
2015-03-06 05:53:33 +05:30
|
|
|
checks.append( Check_missing_sd() )
|
|
|
|
checks.append( Check_runaway() )
|
|
|
|
checks.append( Check_xml_chars() )
|
|
|
|
checks.append( Check_last_char() )
|
|
|
|
checks.append( Check_shortcut_trans() )
|
|
|
|
|
|
|
|
for msg in msgs:
|
|
|
|
msgid = msg.msgid
|
|
|
|
msgstr = msg.msgstr
|
|
|
|
#print
|
|
|
|
#print("msgid: %(msgid)s" % vars())
|
|
|
|
#print("msgstr: %(msgstr)s" % vars())
|
|
|
|
|
|
|
|
if ''.join(msgstr) == '':
|
|
|
|
nr_untranslated += 1
|
|
|
|
continue
|
|
|
|
|
|
|
|
if msg.is_fuzzy:
|
|
|
|
nr_fuzzy += 1
|
|
|
|
continue
|
2013-01-02 04:43:45 +05:30
|
|
|
|
|
|
|
for c in checks:
|
2015-03-06 05:53:33 +05:30
|
|
|
c.process( msg )
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
nr_msgs = len(msgs)
|
|
|
|
if nth > 0:
|
|
|
|
print
|
|
|
|
print("=====================================")
|
|
|
|
print("%-20s%s" % ( "File:", fname ))
|
|
|
|
print("%-20s%d" % ( "Template total:", nr_templates ))
|
|
|
|
print("%-20s%d" % ( "PO total:", nr_msgs ))
|
|
|
|
print("%-20s%d" % ( "Fuzzy:", nr_fuzzy ))
|
|
|
|
print("%-20s%d" % ( "Untranslated:", nr_untranslated ))
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
for c in checks:
|
|
|
|
c.summary()
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
po_coverage = (1.0 - (float(nr_untranslated) / float(nr_msgs))) * 100
|
|
|
|
print("%-20s%5.2f%%" % ( "PO Coverage:", po_coverage ))
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
template_coverage = po_coverage * float(nr_msgs) / float(nr_templates)
|
|
|
|
print("%-20s%5.2f%%" % ( "Template Coverage:", template_coverage ))
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
not_displayed = nr_untranslated + nr_fuzzy
|
|
|
|
translation = (1.0 - (float(not_displayed) / float(nr_templates))) * 100
|
|
|
|
text = "%-20s%5.2f%%" % ( "Localized at:", translation)
|
|
|
|
|
|
|
|
if int(template_coverage*1000) == int(po_coverage*1000):
|
|
|
|
print(text)
|
|
|
|
else:
|
|
|
|
print(text + ' (previous gramps.pot)')
|
|
|
|
|
|
|
|
for c in checks:
|
|
|
|
c.diag()
|
2012-12-20 23:12:29 +05:30
|
|
|
|
2006-11-09 00:52:08 +05:30
|
|
|
def main():
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2015-03-06 05:53:33 +05:30
|
|
|
parser = ArgumentParser(description='This program validates '
|
|
|
|
'a PO file for GRAMPS.')
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2012-12-02 23:09:25 +05:30
|
|
|
parser.add_argument("-s", dest="summary",
|
2015-03-06 05:53:33 +05:30
|
|
|
choices=[file for file in os.listdir('.')
|
|
|
|
if file.endswith('.po')],
|
|
|
|
default=False,
|
|
|
|
help="the summary of check, "
|
|
|
|
"and if need, it gives details")
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2012-12-02 19:22:37 +05:30
|
|
|
args = parser.parse_args()
|
2013-01-02 04:43:45 +05:30
|
|
|
|
2012-12-02 19:22:37 +05:30
|
|
|
if args.summary:
|
|
|
|
files = sys.argv[2:]
|
2006-11-16 02:30:24 +05:30
|
|
|
|
2013-01-02 04:43:45 +05:30
|
|
|
try:
|
2015-03-06 05:53:33 +05:30
|
|
|
pot_msgs = read_msgs( 'gramps.pot' )
|
|
|
|
nr_templates = len( pot_msgs )
|
|
|
|
nth = 0
|
|
|
|
for fname in files:
|
|
|
|
msgs = read_msgs( fname )
|
|
|
|
analyze_msgs( files, fname, msgs, nr_templates, nth )
|
|
|
|
nth += 1
|
2006-11-09 00:52:08 +05:30
|
|
|
|
2013-01-02 04:43:45 +05:30
|
|
|
except CheckException as e:
|
2015-03-06 05:53:33 +05:30
|
|
|
print('Oops.', e)
|
|
|
|
print('Bailing out')
|
2006-11-09 00:52:08 +05:30
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2015-03-06 05:53:33 +05:30
|
|
|
main()
|