* src/plugins/ImportGeneWeb.py (decode): Decode characters and named entities. Because gramps is not web browser based we can simply use unicode.
svn: r4608
This commit is contained in:
parent
f8e15f5269
commit
8c2ef473f4
@ -1,3 +1,8 @@
|
|||||||
|
2005-05-17 Martin Hawlisch <Martin.Hawlisch@gmx.de>
|
||||||
|
* src/plugins/ImportGeneWeb.py (decode): Decode characters and named
|
||||||
|
entities. Because gramps is not web browser based we can simply use
|
||||||
|
unicode.
|
||||||
|
|
||||||
2005-05-16 Don Allingham <don@gramps-project.org>
|
2005-05-16 Don Allingham <don@gramps-project.org>
|
||||||
* src/PlaceView.py: select correct column for sorting
|
* src/PlaceView.py: select correct column for sorting
|
||||||
* src/SourceView.py: select correct column for sorting
|
* src/SourceView.py: select correct column for sorting
|
||||||
|
@ -52,6 +52,7 @@ import Utils
|
|||||||
import const
|
import const
|
||||||
from QuestionDialog import ErrorDialog
|
from QuestionDialog import ErrorDialog
|
||||||
from DateHandler import parser as _dp
|
from DateHandler import parser as _dp
|
||||||
|
from htmlentitydefs import name2codepoint
|
||||||
|
|
||||||
#-------------------------------------------------------------------------
|
#-------------------------------------------------------------------------
|
||||||
#
|
#
|
||||||
@ -698,7 +699,29 @@ class GeneWebParser:
|
|||||||
return sref
|
return sref
|
||||||
|
|
||||||
def decode(self,s):
|
def decode(self,s):
|
||||||
return( latin_utf8.latin_to_utf8( s.replace('_',' ')))
|
s = latin_utf8.latin_to_utf8( s.replace('_',' '))
|
||||||
|
charref_re = re.compile('(&#)(x?)([0-9a-zA-Z]+)(;)')
|
||||||
|
for match in charref_re.finditer(s):
|
||||||
|
try:
|
||||||
|
if match.group(2): # HEX
|
||||||
|
nchar = unichr(int(match.group(3),16))
|
||||||
|
else: # Decimal
|
||||||
|
nchar = unichr(int(match.group(3)))
|
||||||
|
s = s.replace(match.group(0),nchar)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# replace named entities
|
||||||
|
entref_re = re.compile('(&)([a-zA-Z]+)(;)')
|
||||||
|
for match in entref_re.finditer(s):
|
||||||
|
try:
|
||||||
|
if match.group(2) in name2codepoint:
|
||||||
|
nchar = unichr(name2codepoint[match.group(2)])
|
||||||
|
s = s.replace(match.group(0),nchar)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return( s)
|
||||||
|
|
||||||
def cnv(seld,s):
|
def cnv(seld,s):
|
||||||
return( latin_utf8.latin_to_utf8(s))
|
return( latin_utf8.latin_to_utf8(s))
|
||||||
|
Loading…
Reference in New Issue
Block a user