7258: Transcode FS paths to avoid a crash

The problem is really that the paths get munged into strings in the
system codepage, losing or misinterpreting most Unicode. Python's os
module is smart enough to encode the unicode to the file system
encoding, but the bsddb module needs a little help.

Provide a new function, constfunc.get_env_var(name, default=None) to
cleanly import Unicode environment variables in Windows and use it in
place of all instances of foo = os.environ['BAR] or foo =
os.environ.get('BAR').

Os path functions are smart enough to convert unicode to the file system
encoding on their own, but Db functions aren't, so provide an _encode
function in gen.db.write.py and apply it where a path is being passed to
DBEnv.open().

Also convert paths from the UI to unicode from 'utf8' rather than
sysfilesystemencoding. The latter happens to be correct most of the time
on Linux and OSX but is wrong on Windows.
This commit is contained in:
John Ralls 2014-03-11 15:52:59 -07:00
parent 53acb627bb
commit 85a1c39092
14 changed files with 109 additions and 68 deletions

@ -234,8 +234,6 @@ class CLIDbManager(object):
"""
# make the default directory if it does not exist
dbdir = os.path.expanduser(config.get('behavior.database-path'))
if sys.version_info[0] < 3:
dbdir = dbdir.encode(glocale.getfilesystemencoding())
db_ok = make_dbdir(dbdir)
self.current_names = []
@ -411,6 +409,7 @@ class CLIDbManager(object):
Returns old_name, new_name if success, None, None if no success
"""
try:
filepath = conv_to_unicode(filepath, 'utf8')
name_file = open(filepath, "r")
old_text=name_file.read()
name_file.close()

@ -44,7 +44,7 @@ import uuid
#
#-------------------------------------------------------------------------
from .git_revision import get_git_revision
from .constfunc import get_env_var
#-------------------------------------------------------------------------
#
# Gramps Version
@ -88,28 +88,18 @@ APP_VCARD = ["text/x-vcard", "text/x-vcalendar"]
#
#-------------------------------------------------------------------------
if 'GRAMPSHOME' in os.environ:
USER_HOME = os.environ['GRAMPSHOME']
USER_HOME = get_env_var('GRAMPSHOME')
HOME_DIR = os.path.join(USER_HOME, 'gramps')
elif 'USERPROFILE' in os.environ:
USER_HOME = os.environ['USERPROFILE']
USER_HOME = get_env_var('USERPROFILE')
if 'APPDATA' in os.environ:
HOME_DIR = os.path.join(os.environ['APPDATA'], 'gramps')
else:
HOME_DIR = os.path.join(USER_HOME, 'gramps')
else:
USER_HOME = os.environ['HOME']
USER_HOME = get_env_var('HOME')
HOME_DIR = os.path.join(USER_HOME, '.gramps')
# Conversion of USER_HOME to unicode was needed to have better
# support for non ASCII path names in Windows for the Gramps database.
if sys.version_info[0] < 3:
if not isinstance(USER_HOME, unicode):
USER_HOME = unicode(USER_HOME, sys.getfilesystemencoding())
if not isinstance(HOME_DIR, unicode):
HOME_DIR = unicode(HOME_DIR, sys.getfilesystemencoding())
else:
pass
VERSION_DIR = os.path.join(
HOME_DIR, "gramps%s%s" % (VERSION_TUPLE[0], VERSION_TUPLE[1]))
@ -125,7 +115,7 @@ THUMB_NORMAL = os.path.join(THUMB_DIR, "normal")
THUMB_LARGE = os.path.join(THUMB_DIR, "large")
USER_PLUGINS = os.path.join(VERSION_DIR, "plugins")
# dirs checked/made for each Gramps session
USER_DIRLIST = (HOME_DIR, VERSION_DIR, ENV_DIR, TEMP_DIR, THUMB_DIR,
USER_DIRLIST = (USER_HOME, HOME_DIR, VERSION_DIR, ENV_DIR, TEMP_DIR, THUMB_DIR,
THUMB_NORMAL, THUMB_LARGE, USER_PLUGINS)
#-------------------------------------------------------------------------

@ -34,6 +34,8 @@ perform a translation on import, eg Gtk.
#------------------------------------------------------------------------
import platform
import sys
import ctypes
import os
#-------------------------------------------------------------------------
#
@ -53,7 +55,11 @@ WINDOWS = ["Windows", "win32"]
#python 2 and 3 support, use correct conversion to unicode
if sys.version_info[0] < 3:
conv_to_unicode = unicode
def conv_to_unicode(x, y):
if isinstance(x, unicode):
return x
return unicode(x, y)
conv_to_unicode_direct = unicode
STRTYPE = basestring
UNITYPE = unicode
@ -159,3 +165,28 @@ def mod_key():
return "<ctrl>"
return "<alt>"
# Python2 on Windows munges environemnt variables to match the system
# code page. This breaks all manner of things and the workaround
# though a bit ugly, is encapsulated here. Use this to retrieve
# environment variables if there's any chance they might contain
# Unicode, and especially for paths.
# Shamelessly lifted from http://stackoverflow.com/questions/2608200/problems-with-umlauts-in-python-appdata-environvent-variable, answer 1.
def get_env_var(name, default=None):
if not name or not name in os.environ:
return default
if sys.version_info[0] < 3 and win():
name = unicode(name) # make sure string argument is unicode
n = ctypes.windll.kernel32.GetEnvironmentVariableW(name, None, 0)
if n==0:
return default
buf = ctypes.create_unicode_buffer(u'\0'*n)
ctypes.windll.kernel32.GetEnvironmentVariableW(name, buf, n)
return buf.value
return os.environ[name]

@ -57,10 +57,7 @@ except:
DBPageNotFoundError = 0
DBInvalidArgError = 0
from ..const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
import re
import logging
#-------------------------------------------------------------------------
@ -88,8 +85,9 @@ from ..utils.cast import conv_dbstr_to_unicode
from . import (BsddbBaseCursor, DbReadBase)
from ..utils.id import create_id
from ..errors import DbError
from ..constfunc import UNITYPE, STRTYPE, cuni, handle2internal
from ..constfunc import UNITYPE, STRTYPE, cuni, handle2internal, get_env_var
from ..const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
LOG = logging.getLogger(DBLOGNAME)
LOG = logging.getLogger(".citation")
@ -287,7 +285,7 @@ class DbBsddbRead(DbReadBase, Callback):
__signals__ = {}
# If this is True logging will be turned on.
try:
_LOG_ALL = int(os.environ.get('GRAMPS_SIGNAL', "0")) == 1
_LOG_ALL = int(get_env_var('GRAMPS_SIGNAL', "0")) == 1
except:
_LOG_ALL = False

@ -88,7 +88,8 @@ from ..utils.callback import Callback
from ..utils.cast import conv_dbstr_to_unicode
from ..updatecallback import UpdateCallback
from ..errors import DbError
from ..constfunc import win, conv_to_unicode, cuni, UNITYPE, handle2internal
from ..constfunc import (win, conv_to_unicode, cuni, UNITYPE, handle2internal,
get_env_var)
from ..const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
@ -216,6 +217,16 @@ def find_referenced_handle(key, data):
val = val.encode('utf-8')
return val
def _encode(path):
"""
Conditionally return the unicode string encoded to sys.filesystem.encoding
"""
if not (isinstance(path, UNITYPE) and win() and sys.version_info[0] < 3):
_LOG.debug("Didn't Encode %s", repr(path))
return path
_LOG.debug("Encoding %s", repr(path))
return path.encode(sys.getfilesystemencoding())
#-------------------------------------------------------------------------
#
# BsddbWriteCursor
@ -421,7 +432,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
dbmap = db.DB(self.env)
dbmap.set_flags(flags)
fname = os.path.join(file_name, table_name + DBEXT)
fname = _encode(os.path.join(file_name, table_name + DBEXT))
if self.readonly:
dbmap.open(fname, table_name, dbtype, DBFLAGS_R)
@ -432,7 +443,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
def __open_shelf(self, file_name, table_name, dbtype=db.DB_HASH):
dbmap = dbshelve.DBShelf(self.env)
fname = os.path.join(file_name, table_name + DBEXT)
fname = _encode(os.path.join(file_name, table_name + DBEXT))
if self.readonly:
dbmap.open(fname, table_name, dbtype, DBFLAGS_R)
@ -563,7 +574,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
"""Older version of Berkeley DB can't read data created by a newer
version."""
bdb_version = db.version()
versionpath = os.path.join(self.path, BDBVERSFN)
versionpath = os.path.join(self.path, cuni(BDBVERSFN))
# Compare the current version of the database (bsddb_version) with the
# version of the database code (env_version). If it is a downgrade,
# raise an exception because we can't do anything. If they are the same,
@ -762,7 +773,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
env_name = name
try:
self.env.open(env_name, env_flags)
self.env.open(_encode(env_name), env_flags)
except Exception as msg:
_LOG.warning("Error opening db environment: " + str(msg))
try:
@ -895,7 +906,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
Open the undo database
"""
if not self.readonly:
self.undolog = os.path.join(self.full_name, DBUNDOFN)
self.undolog = _encode(os.path.join(self.full_name, DBUNDOFN))
self.undodb = DbUndo(self, self.undolog)
self.undodb.open()
@ -2390,7 +2401,7 @@ class DbBsddb(DbBsddbRead, DbWriteBase, UpdateCallback):
# Environment name is now based on the filename
env_name = name
self.env.open(env_name, env_flags)
self.env.open(_encode(env_name), env_flags)
self.env.txn_checkpoint()
self.metadata = self.__open_shelf(full_name, META)
@ -2464,7 +2475,7 @@ def write_lock_file(name):
try:
user = os.getlogin()
except:
user = os.environ.get('USER')
user = get_env_var('USER')
if host:
text = "%s@%s" % (user, host)
else:

@ -178,7 +178,7 @@ class GrampsLocale(object):
and (localedir is None or localedir == cls.__first_instance.localedir)
and (domain is None or domain == cls.__first_instance.localedomain)
and (languages is None or len(languages) == 0 or
languages == cls.__first_instance.languages)):
languages == cls.__first_instance.language)):
return cls.__first_instance
return super(GrampsLocale, cls).__new__(cls)
@ -530,7 +530,7 @@ class GrampsLocale(object):
if hasattr(self, 'initialized') and self.initialized:
return
_first = self._GrampsLocale__first_instance
self.localedir = None
# Everything breaks without localedir, so get that set up
# first. Warnings are logged in _init_first_instance or
# _init_secondary_locale if this comes up empty.
@ -539,7 +539,7 @@ class GrampsLocale(object):
elif _first and hasattr(_first, 'localedir'):
self.localedir = _first.localedir
else:
self.localedir = None
LOG.warn("No Localedir or localdir %s invalid", localedir)
self.lang = lang
self.localedomain = domain or 'gramps'

@ -29,6 +29,8 @@ _hdlr = logging.StreamHandler()
_hdlr.setFormatter(logging.Formatter(fmt="%(name)s.%(levelname)s: %(message)s"))
LOG.addHandler(_hdlr)
from ..constfunc import get_env_var
class ResourcePath(object):
"""
ResourcePath is a singleton, meaning that only one of them is ever
@ -58,10 +60,9 @@ class ResourcePath(object):
else:
test_path = os.path.join("data", "authors.xml")
resource_path = None
if ('GRAMPS_RESOURCES' in os.environ and
os.path.exists(os.path.join(os.environ['GRAMPS_RESOURCES'],
test_path))):
resource_path = os.environ['GRAMPS_RESOURCES']
tmp_path = get_env_var('GRAMPS_RESOURCES')
if (tmp_path and os.path.exists(os.path.join(tmp_path, test_path))):
resource_path = tmp_path
elif installed:
try:
with io.open(resource_file, encoding='utf-8',

@ -29,7 +29,7 @@
# Python modules
#
#-------------------------------------------------------------------------
from __future__ import print_function
from __future__ import print_function, unicode_literals
import sys
## hack to avoid mentioning 'utf8' encoding everywhere unicode or str is is used
@ -53,7 +53,7 @@ from subprocess import Popen, PIPE
#-------------------------------------------------------------------------
from .gen.const import APP_GRAMPS, USER_DIRLIST, HOME_DIR
from .version import VERSION_TUPLE
from .gen.constfunc import win
from .gen.constfunc import win, get_env_var
#-------------------------------------------------------------------------
#
@ -295,11 +295,11 @@ def show_settings():
else:
kernel = None
lang_str = os.environ.get('LANG','not set')
language_str = os.environ.get('LANGUAGE','not set')
grampsi18n_str = os.environ.get('GRAMPSI18N','not set')
grampshome_str = os.environ.get('GRAMPSHOME','not set')
grampsdir_str = os.environ.get('GRAMPSDIR','not set')
lang_str = get_env_var('LANG','not set')
language_str = get_env_var('LANGUAGE','not set')
grampsi18n_str = get_env_var('GRAMPSI18N','not set')
grampshome_str = get_env_var('GRAMPSHOME','not set')
grampsdir_str = get_env_var('GRAMPSDIR','not set')
try:
dotversion_str = Popen(['dot', '-V'], stderr=PIPE).communicate(input=None)[1]
@ -322,7 +322,7 @@ def show_settings():
except:
gsversion_str = 'Ghostscript not in system PATH'
os_path = os.environ.get('PATH','not set')
os_path = get_env_var('PATH','not set')
os_path = os_path.split(os.pathsep)
print ("Gramps Settings:")
@ -412,12 +412,12 @@ def run():
if 'LANG' in os.environ:
LOG.debug('Using LANG: %s' %
os.environ.get('LANG'))
get_env_var('LANG'))
else:
LOG.debug('environment: LANG is not defined')
if 'LANGUAGE' in os.environ:
LOG.debug('Using LANGUAGE: %s' %
os.environ.get('LANGUAGE'))
get_env_var('LANGUAGE'))
else:
LOG.debug('environment: LANGUAGE is not defined')

@ -27,8 +27,9 @@
# Python modules
#
#-------------------------------------------------------------------------
from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
from __future__ import unicode_literals
import os
import sys
@ -57,6 +58,9 @@ from gi.repository import GdkPixbuf
from gramps.gen.const import (AUTHORS, AUTHORS_FILE, COMMENTS, COPYRIGHT_MSG,
DOCUMENTERS, LICENSE_FILE, PROGRAM_NAME, SPLASH,
TRANSLATORS, URL_HOMEPAGE, VERSION)
from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
from gramps.gen.constfunc import get_env_var
from .display import display_url
from gramps.gen.config import config
@ -139,7 +143,7 @@ class GrampsAboutDialog(Gtk.AboutDialog):
% (ellipses(str(VERSION)),
ellipses(str(sys.version).replace('\n','')),
ellipses(str(bsddb.__version__) + " " + str(bsddb.db.version())),
ellipses(os.environ.get('LANG','')),
ellipses(get_env_var('LANG','')),
ellipses(operatingsystem),
ellipses(distribution)))

@ -51,7 +51,7 @@ else:
import logging
LOG = logging.getLogger(".DbManager")
from gramps.gen.constfunc import win, UNITYPE
from gramps.gen.constfunc import win, UNITYPE, conv_to_unicode
if win():
_RCS_FOUND = os.system("rcs -V >nul 2>nul") == 0
if _RCS_FOUND and "TZ" not in os.environ:
@ -250,8 +250,8 @@ class DbManager(CLIDbManager):
self.rcs.set_sensitive(False)
if store.get_value(node, STOCK_COL) == Gtk.STOCK_DIALOG_ERROR:
path = get_unicode_path_from_env_var(store.get_value(node, PATH_COL))
backup = os.path.join(path, "person.gbkp")
path = conv_to_unicode(store.get_value(node, PATH_COL), 'utf8')
backup = os.path.join(path, u"person.gbkp")
self.repair.set_sensitive(os.path.isfile(backup))
else:
self.repair.set_sensitive(False)
@ -370,7 +370,7 @@ class DbManager(CLIDbManager):
self.top.destroy()
del self.selection
del self.name_renderer
path = get_unicode_path_from_env_var(store.get_value(node, PATH_COL))
path = conv_to_unicode(store.get_value(node, PATH_COL), 'utf8')
return (path, store.get_value(node, NAME_COL))
else:
self.top.destroy()
@ -405,7 +405,7 @@ class DbManager(CLIDbManager):
try:
self.break_lock(self.lock_file)
store, node = self.selection.get_selected()
dbpath = get_unicode_path_from_env_var(store.get_value(node, PATH_COL))
dbpath = conv_to_unicode(store.get_value(node, PATH_COL), 'utf8')
(tval, last) = time_val(dbpath)
store.set_value(node, OPEN_COL, 0)
store.set_value(node, STOCK_COL, "")
@ -592,19 +592,20 @@ class DbManager(CLIDbManager):
store, node = self.selection.get_selected()
path = store.get_path(node)
node = self.model.get_iter(path)
filename = self.model.get_value(node, FILE_COL)
filename = conv_to_unicode(self.model.get_value(node, FILE_COL), 'utf8')
try:
name_file = open(filename, "r")
file_name_to_delete=name_file.read()
name_file.close()
remove_filename(file_name_to_delete)
for (top, dirs, files) in os.walk(self.data_to_delete[1]):
directory = conv_to_unicode(self.data_to_delete[1], 'utf8')
for (top, dirs, files) in os.walk(directory):
for filename in files:
os.unlink(os.path.join(top, filename))
os.rmdir(self.data_to_delete[1])
os.rmdir(directory)
except (IOError, OSError) as msg:
DbManager.ERROR(_("Could not delete Family Tree"),
str(msg))
str(msg))
# rebuild the display
self.__populate()
@ -658,7 +659,7 @@ class DbManager(CLIDbManager):
# Create the row and directory, awaits user edit of title:
(new_dir, title) = self._create_new_db(title, create_db=False)
# Copy the files:
name_file = store[node][FILE_COL]
name_file = conv_to_unicode(store[node][FILE_COL], 'utf8')
old_dir = os.path.dirname(name_file)
for filename in os.listdir(old_dir):
if filename == "name.txt":

@ -26,8 +26,8 @@
# Python modules
#
#-------------------------------------------------------------------------
from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
from __future__ import unicode_literals
from gi.repository import Gdk
from gi.repository import Gtk
from gi.repository import GdkPixbuf
@ -46,7 +46,9 @@ else:
# Gramps modules
#
#-------------------------------------------------------------------------
from gramps.gen.const import ICON, SPLASH
from gramps.gen.const import ICON, SPLASH, GRAMPS_LOCALE as glocale
_ = glocale.translation.gettext
from gramps.gen.constfunc import get_env_var
from gramps.version import VERSION
from ..display import display_help, display_url
@ -166,7 +168,7 @@ class ErrorReportAssistant(Gtk.Assistant):
% (str(sys.version).replace('\n',''),
str(bsddb.__version__) + " " + str(bsddb.db.version()),
str(VERSION),
os.environ.get('LANG',''),
get_env_var('LANG',''),
operatingsystem,
distribution,
'%d.%d.%d' % (Gtk.get_major_version(),

@ -85,7 +85,7 @@ from .displaystate import DisplayState, RecentDocsMenu
from gramps.gen.const import (HOME_DIR, ICON, URL_BUGTRACKER, URL_HOMEPAGE,
URL_MAILINGLIST, URL_MANUAL_PAGE, URL_WIKISTRING,
WIKI_EXTRAPLUGINS)
from gramps.gen.constfunc import is_quartz
from gramps.gen.constfunc import is_quartz, conv_to_unicode
from gramps.gen.config import config
from gramps.gen.errors import WindowActiveError
from .dialog import ErrorDialog, WarningDialog, QuestionDialog2, InfoDialog
@ -1100,6 +1100,7 @@ class ViewManager(CLIManager):
value = dialog.run()
if value:
(filename, title) = value
filename = conv_to_unicode(filename, 'utf8')
self.db_loader.read_file(filename)
self._post_load_newdb(filename, 'x-directory/normal', title)

@ -64,6 +64,7 @@ from gramps.gen.const import GRAMPS_LOCALE as glocale
_ = glocale.translation.sgettext
from gramps.gen.config import config
from gramps.gui.dialog import ErrorDialog
from gramps.gen.constfunc import get_env_var
#-------------------------------------------------------------------------
#
@ -133,7 +134,7 @@ class OsmGps():
constants.map_title[map_type])
config.set("geography.map_service", map_type)
self.current_map = map_type
http_proxy = os.environ.get('http_proxy')
http_proxy = get_env_var('http_proxy')
if 0:
self.osm = DummyMapNoGpsPoint()
else:

@ -29,6 +29,8 @@ import os
import tempfile
import unittest as U
from gramps.gen.constfunc import get_env_var
usage_note="""
**************************************************************
Testing (and runing) Gramps requires that PYTHONPATH include
@ -134,7 +136,7 @@ class Test3(U.TestCase):
here = tu.absdir()
bases = (here, tempfile.gettempdir())
asubs = [os.path.join(b,"test_sub") for b in bases]
home= os.environ["HOME"]
home = get_env_var("HOME")
if home:
home_junk = os.path.join(home,"test_junk")
def _rmsubs(self):