Narweb: some fixes to alphabet_navigation

Ensure correct index letters according to normal indexing conventions by using ICU AlphabeticIndex. - Add ICU AlphabeticIndex - Modify modules that use alphabet_navigation to use AlphabeticIndex If Alphabetic index is not available, fix some other issues. - Implement a local AlphabeticIndex for when ICU AlphabeticIndex is not available, using existing code algorithms - Fix Serbian contraction DŽ not being recognised - Fix contractions not getting a separate entry in the alphabet_navigation - Ensure that import from either icu or PyICU works - Fix primary_difference to set collation strength to PRIMARY - Fix contractions lookup to use locale for current report - Fix get_first_letter to pass current report locale Fixes #12350
2021-07-10 19:26:48 +01:00 · 2021-07-10 19:26:48 +01:00 · ea6b1612f9
commit ea6b1612f9
parent f850959726
8 changed files with 1231 additions and 950 deletions
--- a/gramps/plugins/webreport/alphabeticindex.py
+++ b/gramps/plugins/webreport/alphabeticindex.py
@ -0,0 +1,383 @@
+# -*- coding: utf-8 -*-
+#!/usr/bin/env python
+#
+# Gramps - a GTK+/GNOME based genealogy program
+#
+# Copyright (C) 2010-      Serge Noiraud
+# Copyright (C) 2021-      T G L Lyons
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+
+"""
+Narrative Web Page generator.
+
+Class:
+    AlphabeticIndex - approximate emulation of ICU Alphabetic Index
+"""
+
+#------------------------------------------------
+# python modules
+#------------------------------------------------
+from unicodedata import normalize
+from collections import defaultdict
+import logging
+
+#------------------------------------------------
+# Gramps module
+#------------------------------------------------
+from gramps.gen.const import GRAMPS_LOCALE as glocale
+
+HAVE_ICU = False
+try:
+    from icu import Locale, Collator
+    HAVE_ICU = True
+except ImportError:
+    try:
+        from PyICU import Locale, Collator
+        HAVE_ICU = True
+    except ImportError:
+        pass
+
+LOG = logging.getLogger(".NarrativeWeb")
+COLLATE_LANG = glocale.collation
+class U_ENUM_OUT_OF_SYNC_ERROR(Exception): # pylint: disable=invalid-name
+    """
+    Exception to match the error in the ICU AlphabetiIndex
+    """
+    pass
+
+# See : http://www.gramps-project.org/bugs/view.php?id = 4423
+
+# Contraction data taken from CLDR 22.1. Only the default variant is considered.
+# The languages included below are, by no means, all the langauges that have
+# contractions - just a sample of langauges that have been supported
+
+# At the time of writing (Feb 2013), the following langauges have greater that
+# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da
+# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he
+# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb
+# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese
+# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv
+# Swedish, vi Vietnamese, zh_CN Chinese.
+
+# Key is the language (or language and country), Value is a list of
+# contractions. Each contraction consists of a tuple. First element of the
+# tuple is the list of characters, second element is the string to use as the
+# index entry.
+
+# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored,
+# as are the supresscontractions in some locales.
+
+CONTRACTIONS_DICT = {
+# bg Bulgarian validSubLocales="bg_BG" no contractions
+# ca Catalan validSubLocales="ca_AD ca_ES"
+"ca" : [(("l·", "L·"), "L")],
+# Czech, validSubLocales="cs_CZ" Czech_Czech Republic
+"cs" : [(("ch", "cH", "Ch", "CH"), "CH")],
+# Danish validSubLocales="da_DK" Danish_Denmark
+"da" : [(("aa", "Aa", "AA"), "Å")],
+# de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no
+# contractions in standard collation.
+# el Greek validSubLocales="el_CY el_GR" no contractions.
+# es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU
+# es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE
+# es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard
+# collation.
+# fi Finish validSubLocales="fi_FI" no contractions in default (phonebook)
+# collation.
+# fr French no collation data.
+# he Hebrew validSubLocales="he_IL" no contractions
+# hr Croation validSubLocales="hr_BA hr_HR"
+"hr" : [(("dž", "Dž", "DŽ"), "DŽ"),
+        (("lj", "Lj", 'LJ'), "Ǉ"),
+        (("Nj", "NJ", "nj"), "Ǌ")],
+# Hungarian hu_HU for two and three character contractions.
+"hu" : [(("cs", "Cs", "CS"), "CS"),
+        (("dzs", "Dzs", "DZS"), "DZS"), # order is important
+        (("dz", "Dz", "DZ"), "DZ"),
+        (("gy", "Gy", "GY"), "GY"),
+        (("ly", "Ly", "LY"), "LY"),
+        (("ny", "Ny", "NY"), "NY"),
+        (("sz", "Sz", "SZ"), "SZ"),
+        (("ty", "Ty", "TY"), "TY"),
+        (("zs", "Zs", "ZS"), "ZS")
+       ],
+# it Italian no collation data.
+# ja Japanese unable to process the data as it is too complex.
+# lt Lithuanian no contractions.
+# Norwegian Bokmål
+"nb" : [(("aa", "Aa", "AA"), "Å")],
+# nn Norwegian Nynorsk validSubLocales="nn_NO"
+"nn" : [(("aa", "Aa", "AA"), "Å")],
+# nl Dutch no collation data.
+# pl Polish validSubLocales="pl_PL" no contractions
+# pt Portuguese no collation data.
+# ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no
+# contractions
+# Slovak,  validSubLocales="sk_SK" Slovak_Slovakia
+# having DZ in Slovak as a contraction was rejected in
+# http://unicode.org/cldr/trac/ticket/2968
+"sk" : [(("ch", "cH", "Ch", "CH"), "Ch")],
+# sl Slovenian validSubLocales="sl_SI" no contractions
+# sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is
+# "reformed" no contractions.
+# vi Vietnamese validSubLocales="vi_VN" no contractions.
+# zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions
+# in Latin characters the others are too complex.
+}
+
+# The comment below from the glibc locale sv_SE in
+# localedata/locales/sv_SE :
+#
+# % The letter w is normally not present in the Swedish alphabet. It
+# % exists in some names in Swedish and foreign words, but is accounted
+# % for as a variant of 'v'.  Words and names with 'w' are in Swedish
+# % ordered alphabetically among the words and names with 'v'. If two
+# % words or names are only to be distinguished by 'v' or % 'w', 'v' is
+# % placed before 'w'.
+#
+# See : http://www.gramps-project.org/bugs/view.php?id = 2933
+#
+
+# HOWEVER: the characters V and W in Swedish are not considered as a special
+# case for several reasons. (1) The default collation for Swedish (called the
+# 'reformed' collation type) regards the difference between 'v' and 'w' as a
+# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation
+# type are not a contraction, just a case where the difference is secondary
+# rather than primary. (3) There are plenty of other languages where a
+# difference that is primary in other languages is secondary, and those are not
+# specially handled.
+
+def first_letter(string, rlocale=glocale):
+    """
+    Receives a string and returns the first letter
+    """
+    if string is None or len(string) < 1:
+        return ' '
+
+    norm_unicode = normalize('NFKC', str(string))
+    contractions = CONTRACTIONS_DICT.get(rlocale.collation)
+    if contractions is None:
+        contractions = CONTRACTIONS_DICT.get(rlocale.collation.split("_")[0])
+
+    if contractions is not None:
+        for contraction in contractions:
+            count = len(contraction[0][0])
+            if (len(norm_unicode) >= count and
+                    norm_unicode[:count] in contraction[0]):
+                return contraction[1]
+
+    # no special case
+    return norm_unicode[0].upper()
+
+if HAVE_ICU:
+    def primary_difference(prev_key, new_key, rlocale=glocale):
+        """
+        Try to use the PyICU collation.
+        If we generate a report for another language, make sure we use the good
+        collation sequence
+        """
+        collate_lang = Locale(rlocale.collation)
+        collation = Collator.createInstance(collate_lang)
+        collation.setStrength(Collator.PRIMARY)
+        return collation.compare(prev_key, new_key) != 0
+
+else:
+    def primary_difference(prev_key, new_key, rlocale=glocale):
+        """
+        The PyICU collation is not available.
+
+        Returns true if there is a primary difference between the two parameters
+        See http://www.gramps-project.org/bugs/view.php?id=2933#c9317 if
+        letter[i]+'a' < letter[i+1]+'b' and letter[i+1]+'a' < letter[i]+'b' is
+        true then the letters should be grouped together
+
+        The test characters here must not be any that are used in contractions.
+        """
+
+        return rlocale.sort_key(prev_key + "e") >= \
+                   rlocale.sort_key(new_key + "f") or \
+                   rlocale.sort_key(new_key + "e") >= \
+                   rlocale.sort_key(prev_key + "f")
+
+def get_index_letter(letter, index_list, rlocale=glocale):
+    """
+    This finds the letter in the index_list that has no primary difference from
+    the letter provided. See the discussion in get_first_letters above.
+    Continuing the example, if letter is Å and index_list is A, then this would
+    return A.
+
+    @param: letter     -- The letter to find in the index_list
+    @param: index_list -- The list of all first letters in use
+    @param: rlocale    -- The locale to use
+    """
+    for index in index_list:
+        if not primary_difference(letter, index, rlocale):
+            return index
+
+    LOG.warning("Initial letter '%s' not found in alphabetic navigation list",
+                letter)
+    LOG.debug("filtered sorted index list %s", index_list)
+    return letter
+
+#------------------------------------------------------------
+#
+# AlphabeticIndex (local non-ICU version)
+#
+#------------------------------------------------------------
+class AlphabeticIndex():
+    """
+    Approximately emulate the ICU AlphabeticIndex
+    """
+    def __init__(self, rlocale):
+        self.rlocale = rlocale
+        self._record_list = []
+        # self.index_list = []
+
+        self._bucket_list = []
+        self._dirty = False
+        self._bucket = -1
+        self._record = -1
+        # Externally available properties
+        self.bucketLabel = "" # pylint: disable=invalid-name
+        self.recordName = "" # pylint: disable=invalid-name
+        self.recordData = "" # pylint: disable=invalid-name
+        self.bucketRecordCount = 0 # pylint: disable=invalid-name
+
+    def __create_index(self):
+        """
+        Internal routine to create the Alphabetic Index
+        """
+        # The first letter (or letters if there is a contraction) are extracted
+        # from all the records in the _record_list. There may be duplicates, and
+        # there may be letters where there is only a secondary or tertiary
+        # difference, not a primary difference.
+        index_list = []
+        for (name, dummy_data) in self._record_list:
+            ltr = first_letter(name, self.rlocale)
+            index_list.append(ltr)
+        # The list is sorted in collation order.
+        index_list.sort(key=self.rlocale.sort_key)
+        # For each group with secondary or tertiary differences, the first in
+        # collation sequence is retained. For example, assume the default
+        # collation sequence (DUCET) and names Ånström and Apple. These will
+        # sort in the order shown. Å and A have a secondary difference. If the
+        # first letter from these names was chosen then the index entry would be
+        # Å. This is not desirable. Instead, the initial letters are extracted
+        # (Å and A). These are sorted, which gives A and Å. Then the first of
+        # these is used for the index entry.
+
+        # now remove letters where there is not a primary difference
+        first = True
+        prev_index = None
+        for nkey in index_list[:]:
+            # iterate over a slice copy
+            if first or primary_difference(prev_index, nkey, self.rlocale):
+                first = False
+                prev_index = nkey
+            else:
+                index_list.remove(nkey)
+
+        # finally construct the buckets and contents
+        bucket_dict = defaultdict(list)
+        for (name, data) in \
+                    sorted(self._record_list,
+                           key=lambda x: self.rlocale.sort_key(x[0])):
+            letter = first_letter(name, self.rlocale)
+            letter = get_index_letter(letter, index_list, self.rlocale)
+            bucket_dict[letter].append((name, data))
+
+        self._bucket_list = sorted(bucket_dict.items(),
+                             key=lambda x : self.rlocale.sort_key(x[0]))
+
+        self._dirty = False
+
+    def addRecord(self, name, data):  # pylint: disable=invalid-name
+        """
+        Add a record to the index.
+
+        Each record will be associated with an index Bucket based on the
+        record's name. The list of records for each bucket will be sorted
+        based on the collation ordering of the names in the index's locale.
+        Records with duplicate names are permitted; they will be kept in the
+        order that they were added.
+
+        @param: name        --  The display name
+                                for the Record. The Record will be placed in
+                                a bucket based on this name.
+        @param: data        --  An optional pointer to user data associated
+                                with this item. When iterating the contents
+                                of a bucket, both the data pointer the name
+                                will be available for each Record.
+        """
+        self._record_list.append((name, data))
+        self._dirty = True
+
+    def resetBucketIterator(self): # pylint: disable=invalid-name
+        """
+        Reset the Bucket iteration for this index.
+
+        The next call to nextBucket() will restart the iteration at the
+        first label.
+        """
+        if self._dirty:
+            self.__create_index()
+        self._bucket = -1
+        self.bucketLabel = ""
+        self._record = -1
+        self.recordName = ""
+        self.recordData = ""
+
+    def nextBucket(self): # pylint: disable=invalid-name
+        """
+        Advance the iteration over the Buckets of this index.
+
+        Return false if there are no more Buckets.
+        """
+        if self._dirty:
+            raise U_ENUM_OUT_OF_SYNC_ERROR
+        self._bucket += 1
+        self._record = -1
+        if self._bucket < len(self._bucket_list):
+            self.bucketLabel = self._bucket_list[self._bucket][0]
+            self.bucketRecordCount = len(self._bucket_list[self._bucket][1])
+            self.recordName = ""
+            self.recordData = None
+            return True
+        else:
+            return False
+
+    def nextRecord(self): # pylint: disable=invalid-name
+        """
+        Advance to the next record in the current Bucket.
+
+        When nextBucket() is called, Record iteration is reset to just
+        before the first Record in the new Bucket.
+        """
+        if self._dirty:
+            raise U_ENUM_OUT_OF_SYNC_ERROR
+        self._record += 1
+        if self._record < len(self._bucket_list[self._bucket][1]):
+            curr_bucket = self._bucket_list[self._bucket]
+            bucket_value = curr_bucket[1]
+            curr_record = bucket_value[self._record]
+            self.recordName = curr_record[0]
+            self.recordData = curr_record[1]
+            return True
+        else:
+            self.recordName = ""
+            self.recordData = None
+            return False
--- a/gramps/plugins/webreport/common.py
+++ b/gramps/plugins/webreport/common.py
@ -27,11 +27,9 @@ This module is used to share variables, enums and functions between all modules

 """

-from unicodedata import normalize
 from collections import defaultdict
 from hashlib import md5
 import re
-import locale # Used only with pyICU
 import logging
 from xml.sax.saxutils import escape

@ -45,6 +43,30 @@ from gramps.plugins.lib.libgedcom import make_gedcom_date, DATE_QUALITY
 from gramps.gen.plug.report import utils
 from gramps.plugins.lib.libhtml import Html

+HAVE_ICU = False
+HAVE_ALPHABETICINDEX = False #separate check as this is only in ICU 4.6+
+try:
+    from icu import Locale
+    HAVE_ICU = True
+    try:
+        from icu import AlphabeticIndex as icuAlphabeticIndex
+        HAVE_ALPHABETICINDEX = True
+    except ImportError:
+        from gramps.plugins.webreport.alphabeticindex \
+            import AlphabeticIndex as localAlphabeticIndex
+except ImportError:
+    try:
+        from PyICU import Locale
+        HAVE_ICU = True
+        try:
+            from PyICU import AlphabeticIndex as icuAlphabeticIndex
+            HAVE_ALPHABETICINDEX = True
+        except ImportError:
+            from gramps.plugins.webreport.alphabeticindex \
+                import AlphabeticIndex as localAlphabeticIndex
+    except ImportError:
+        pass
+
 LOG = logging.getLogger(".NarrativeWeb")

 # define clear blank line for proper styling
@ -418,28 +440,57 @@ def do_we_have_holidays(lang):
            return idx
    return None

+def get_surname_from_person(dbase, person):
+    """
+    get the person's surname
+    get the primary name
+    if group as get the group_as surname
+    else get the primary surname of the primary name
+         and correct for [global] group_as name
+    correct for surnames that are space or None
+    """
+    primary_name = person.get_primary_name()
+
+    if primary_name.group_as:
+        surname = primary_name.group_as
+    else:
+        group_map = _nd.primary_surname(primary_name)
+        surname = dbase.get_name_group_mapping(group_map)
+
+    # Treat people who have no name with those whose name is just
+    # 'whitespace'
+    if surname is None or surname.isspace():
+        surname = ''
+    return surname
+
 def sort_people(dbase, handle_list, rlocale=glocale):
    """
    will sort the database people by surname
+    @param: dbase           -- The instance of the database
+    @param: handle_list     -- The list of handles of people to sort
+    @param: rlocale         -- The locale related to the language used for the
+                               sort
+    @result:                -- A list sorted by surname, each element of which
+                               consists of a tuple of (surname, list of handles)
+                               where the list of handles is sorted by
+                               primary surname, first name, suffix.
+                               Surname uses group_as, but primary surname
+                               does not.
+    get the primary name
+    if group as get the group_as surname
+    else get the primary surname of the primary name
+         and correct for [global] group_as name
+    correct for surnames that are space or None
+    for each surname sort handles by the surname, first name and suffix
+    construct a list of surnames and list of handles
    """
    sname_sub = defaultdict(list)
    sortnames = {}

    for person_handle in handle_list:
        person = dbase.get_person_from_handle(person_handle)
-        primary_name = person.get_primary_name()
-
-        if primary_name.group_as:
-            surname = primary_name.group_as
-        else:
-            group_map = _nd.primary_surname(primary_name)
-            surname = dbase.get_name_group_mapping(group_map)
-
-        # Treat people who have no name with those whose name is just
-        # 'whitespace'
-        if surname is None or surname.isspace():
-            surname = ''
-        sortnames[person_handle] = _nd.sort_string(primary_name)
+        surname = get_surname_from_person(dbase, person)
+        sortnames[person_handle] = _nd.sort_string(person.get_primary_name())
        sname_sub[surname].append(person_handle)

    sorted_lists = []
@ -536,242 +587,37 @@ def __get_place_keyname(dbase, handle):

    return utils.place_name(dbase, handle)

-# See : http://www.gramps-project.org/bugs/view.php?id = 4423
-
-# Contraction data taken from CLDR 22.1. Only the default variant is considered.
-# The languages included below are, by no means, all the langauges that have
-# contractions - just a sample of langauges that have been supported
-
-# At the time of writing (Feb 2013), the following langauges have greater that
-# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da
-# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he
-# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb
-# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese
-# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv
-# Swedish, vi Vietnamese, zh_CN Chinese.
-
-# Key is the language (or language and country), Value is a list of
-# contractions. Each contraction consists of a tuple. First element of the
-# tuple is the list of characters, second element is the string to use as the
-# index entry.
-
-# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored,
-# as are the supresscontractions in some locales.
-
-CONTRACTIONS_DICT = {
-    # bg Bulgarian validSubLocales="bg_BG" no contractions
-    # ca Catalan validSubLocales="ca_AD ca_ES"
-    "ca" : [(("l·", "L·"), "L")],
-    # Czech, validSubLocales="cs_CZ" Czech_Czech Republic
-    "cs" : [(("ch", "cH", "Ch", "CH"), "CH")],
-    # Danish validSubLocales="da_DK" Danish_Denmark
-    "da" : [(("aa", "Aa", "AA"), "Å")],
-    # de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no
-    # contractions in standard collation.
-    # el Greek validSubLocales="el_CY el_GR" no contractions.
-    # es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU
-    # es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE
-    # es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard
-    # collation.
-    # fi Finish validSubLocales="fi_FI" no contractions in default (phonebook)
-    # collation.
-    # fr French no collation data.
-    # he Hebrew validSubLocales="he_IL" no contractions
-    # hr Croation validSubLocales="hr_BA hr_HR"
-    "hr" : [(("dž", "Dž"), "dž"),
-            (("lj", "Lj", 'LJ'), "Ǉ"),
-            (("Nj", "NJ", "nj"), "Ǌ")],
-    # Hungarian hu_HU for two and three character contractions.
-    "hu" : [(("cs", "Cs", "CS"), "CS"),
-            (("dzs", "Dzs", "DZS"), "DZS"), # order is important
-            (("dz", "Dz", "DZ"), "DZ"),
-            (("gy", "Gy", "GY"), "GY"),
-            (("ly", "Ly", "LY"), "LY"),
-            (("ny", "Ny", "NY"), "NY"),
-            (("sz", "Sz", "SZ"), "SZ"),
-            (("ty", "Ty", "TY"), "TY"),
-            (("zs", "Zs", "ZS"), "ZS")
-           ],
-    # it Italian no collation data.
-    # ja Japanese unable to process the data as it is too complex.
-    # lt Lithuanian no contractions.
-    # Norwegian Bokmål
-    "nb" : [(("aa", "Aa", "AA"), "Å")],
-    # nn Norwegian Nynorsk validSubLocales="nn_NO"
-    "nn" : [(("aa", "Aa", "AA"), "Å")],
-    # nl Dutch no collation data.
-    # pl Polish validSubLocales="pl_PL" no contractions
-    # pt Portuguese no collation data.
-    # ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no
-    # contractions
-    # Slovak,  validSubLocales="sk_SK" Slovak_Slovakia
-    # having DZ in Slovak as a contraction was rejected in
-    # http://unicode.org/cldr/trac/ticket/2968
-    "sk" : [(("ch", "cH", "Ch", "CH"), "Ch")],
-    # sl Slovenian validSubLocales="sl_SI" no contractions
-    # sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is
-    # "reformed" no contractions.
-    # vi Vietnamese validSubLocales="vi_VN" no contractions.
-    # zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions
-    # in Latin characters the others are too complex.
-    }
-
-    # The comment below from the glibc locale sv_SE in
-    # localedata/locales/sv_SE :
-    #
-    # % The letter w is normally not present in the Swedish alphabet. It
-    # % exists in some names in Swedish and foreign words, but is accounted
-    # % for as a variant of 'v'.  Words and names with 'w' are in Swedish
-    # % ordered alphabetically among the words and names with 'v'. If two
-    # % words or names are only to be distinguished by 'v' or % 'w', 'v' is
-    # % placed before 'w'.
-    #
-    # See : http://www.gramps-project.org/bugs/view.php?id = 2933
-    #
-
-# HOWEVER: the characters V and W in Swedish are not considered as a special
-# case for several reasons. (1) The default collation for Swedish (called the
-# 'reformed' collation type) regards the difference between 'v' and 'w' as a
-# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation
-# type are not a contraction, just a case where the difference is secondary
-# rather than primary. (3) There are plenty of other languages where a
-# difference that is primary in other languages is secondary, and those are not
-# specially handled.
-
-def first_letter(string, rlocale=glocale):
-    """
-    Receives a string and returns the first letter
-    """
-    dummy_rlocale = rlocale
-    if string is None or len(string) < 1:
-        return ' '
-
-    norm_unicode = normalize('NFKC', str(string))
-    contractions = CONTRACTIONS_DICT.get(COLLATE_LANG)
-    if contractions is None:
-        contractions = CONTRACTIONS_DICT.get(COLLATE_LANG.split("_")[0])
-
-    if contractions is not None:
-        for contraction in contractions:
-            count = len(contraction[0][0])
-            if (len(norm_unicode) >= count and
-                    norm_unicode[:count] in contraction[0]):
-                return contraction[1]
-
-    # no special case
-    return norm_unicode[0].upper()
-
-
-try:
-    import PyICU # pylint : disable=wrong-import-position
-    PRIM_COLL = PyICU.Collator.createInstance(PyICU.Locale(COLLATE_LANG))
-    PRIM_COLL.setStrength(PRIM_COLL.PRIMARY)
-
-    def primary_difference(prev_key, new_key, rlocale=glocale):
+if HAVE_ALPHABETICINDEX:
+    class AlphabeticIndex(icuAlphabeticIndex):
        """
-        Try to use the PyICU collation.
-        If we generate a report for another language, make sure we use the good
-        collation sequence
+        Call the ICU AlphabeticIndex, passing the ICU Locale
        """
-        collation = PRIM_COLL
-        if rlocale.lang != locale.getlocale(locale.LC_COLLATE)[0]:
-            encoding = rlocale.encoding if rlocale.encoding else "UTF-8"
-            collate_lang = PyICU.Locale(rlocale.collation+"."+encoding)
-            collation = PyICU.Collator.createInstance(collate_lang)
-        return collation.compare(prev_key, new_key) != 0
+        def __init__(self, rlocale):
+            self.iculocale = Locale(rlocale.collation)
+            super().__init__(self.iculocale)

-except:
-    def primary_difference(prev_key, new_key, rlocale=glocale):
-        """
-        The PyICU collation is not available.
+            # set the maximum number of buckets, the undocumented default is 99
+            # Latin + Greek + Cyrillic + Hebrew + Arabic + Tamil + Hiragana +
+            # CJK Unified is about 206 different buckets
+            self.maxLabelCount = 500 # pylint: disable=invalid-name

-        Returns true if there is a primary difference between the two parameters
-        See http://www.gramps-project.org/bugs/view.php?id=2933#c9317 if
-        letter[i]+'a' < letter[i+1]+'b' and letter[i+1]+'a' < letter[i]+'b' is
-        true then the letters should be grouped together
+            # Add bucket labels for scripts other than the one for the output
+            # which is being generated
+            self.iculocale.addLikelySubtags()
+            default_script = self.iculocale.getDisplayScript()
+            used_scripts = [default_script]

-        The test characters here must not be any that are used in contractions.
-        """
+            for lang_code in glocale.get_language_dict().values():
+                loc = Locale(lang_code)
+                loc.addLikelySubtags()
+                script = loc.getDisplayScript()
+                if script not in used_scripts:
+                    used_scripts.append(script)
+                    super().addLabels(loc)
+else:
+    AlphabeticIndex = localAlphabeticIndex

-        return rlocale.sort_key(prev_key + "e") >= \
-                   rlocale.sort_key(new_key + "f") or \
-                   rlocale.sort_key(new_key + "e") >= \
-                   rlocale.sort_key(prev_key + "f")
-
-def get_first_letters(dbase, handle_list, key, rlocale=glocale):
-    """
-    get the first letters of the handle_list
-
-    @param: handle_list -- One of a handle list for either person or
-                           place handles or an evt types list
-    @param: key         -- Either a person, place, or event type
-    @param: rlocale     -- The locale to use
-
-    The first letter (or letters if there is a contraction) are extracted from
-    all the objects in the handle list. There may be duplicates, and there may
-    be letters where there is only a secondary or tertiary difference, not a
-    primary difference. The list is sorted in collation order. For each group
-    with secondary or tertiary differences, the first in collation sequence is
-    retained. For example, assume the default collation sequence (DUCET) and
-    names Ånström and Apple. These will sort in the order shown. Å and A have a
-    secondary difference. If the first letter from these names was chosen then
-    the inex entry would be Å. This is not desirable. Instead, the initial
-    letters are extracted (Å and A). These are sorted, which gives A and Å. Then
-    the first of these is used for the index entry.
-    """
-    index_list = []
-
-    for handle in handle_list:
-        if key == _KEYPERSON:
-            keyname = __get_person_keyname(dbase, handle)
-
-        elif key == _KEYPLACE:
-            keyname = __get_place_keyname(dbase, handle)
-
-        else:
-            if rlocale != glocale:
-                keyname = rlocale.translation.sgettext(handle)
-            else:
-                keyname = handle
-        ltr = first_letter(keyname)
-
-        index_list.append(ltr)
-
-    # Now remove letters where there is not a primary difference
-    index_list.sort(key=rlocale.sort_key)
-    first = True
-    prev_index = None
-    for nkey in index_list[:]:   #iterate over a slice copy of the list
-        if first or primary_difference(prev_index, nkey, rlocale):
-            first = False
-            prev_index = nkey
-        else:
-            index_list.remove(nkey)
-
-    # return menu set letters for alphabet_navigation
-    return index_list
-
-def get_index_letter(letter, index_list, rlocale=glocale):
-    """
-    This finds the letter in the index_list that has no primary difference from
-    the letter provided. See the discussion in get_first_letters above.
-    Continuing the example, if letter is Å and index_list is A, then this would
-    return A.
-
-    @param: letter     -- The letter to find in the index_list
-    @param: index_list -- The list of all first letters in use
-    @param: rlocale    -- The locale to use
-    """
-    for index in index_list:
-        if not primary_difference(letter, index, rlocale):
-            return index
-
-    LOG.warning("Initial letter '%s' not found in alphabetic navigation list",
-                letter)
-    LOG.debug("filtered sorted index list %s", index_list)
-    return letter
-
-def alphabet_navigation(index_list, rlocale=glocale):
+def alphabet_navigation(sorted_alpha_index, rlocale=glocale):
    """
    Will create the alphabet navigation bar for classes IndividualListPage,
    SurnameListPage, PlaceListPage, and EventList
@ -779,14 +625,6 @@ def alphabet_navigation(index_list, rlocale=glocale):
    @param: index_list -- a dictionary of either letters or words
    @param: rlocale    -- The locale to use
    """
-    sorted_set = defaultdict(int)
-
-    for menu_item in index_list:
-        sorted_set[menu_item] += 1
-
-    # remove the number of each occurance of each letter
-    sorted_alpha_index = sorted(sorted_set, key=rlocale.sort_key)
-
    # if no letters, return None to its callers
    if not sorted_alpha_index:
        return None
@ -799,6 +637,8 @@ def alphabet_navigation(index_list, rlocale=glocale):
    with Html("div", id="alphanav") as alphabetnavigation:

        index = 0
+        output = []
+        dup_index = 0
        for dummy_row in range(num_of_rows):
            unordered = Html("ul")

@ -811,8 +651,16 @@ def alphabet_navigation(index_list, rlocale=glocale):
                # braille writers
                title_txt = "Alphabet Menu: %s" % menu_item
                title_str = rlocale.translation.sgettext(title_txt)
+                # deal with multiple ellipsis which are generated for overflow,
+                # underflow and inflow labels
+                link = menu_item
+                if menu_item in output:
+                    link = "%s (%i)" % (menu_item, dup_index)
+                    dup_index += 1
+                output.append(menu_item)
+
                hyper = Html("a", menu_item, title=title_str,
-                             href="#%s" % menu_item)
+                             href="#%s" % link)
                unordered.extend(Html("li", hyper, inline=True))

                index += 1
--- a/gramps/plugins/webreport/event.py
+++ b/gramps/plugins/webreport/event.py
@ -58,11 +58,10 @@ from gramps.plugins.lib.libhtml import Html
 # specific narrative web import
 #------------------------------------------------
 from gramps.plugins.webreport.basepage import BasePage
-from gramps.plugins.webreport.common import (get_first_letters, _ALPHAEVENT,
-                                             _EVENTMAP, alphabet_navigation,
+from gramps.plugins.webreport.common import (_EVENTMAP,
+                                             alphabet_navigation,
                                             FULLCLEAR, sort_event_types,
-                                             primary_difference,
-                                             get_index_letter)
+                                             AlphabeticIndex)

 _ = glocale.translation.sgettext
 LOG = logging.getLogger(".NarrativeWeb")
@ -128,6 +127,107 @@ class EventPages(BasePage):
        self.eventlistpage(self.report, the_lang, the_title, event_types,
                           event_handle_list)

+
+    def __output_event(self, ldatec, event_type, tbody, bucket_letter,
+                       bucket_link, first_letter, _event_displayed, first_type,
+                       event_handle):
+        """
+        Generate and output the data for a single event
+
+        @param: ldatec          -- Last change date and time (updated)
+        @param: event_type      -- The event type name processed for XML etc
+        @param: tbody           -- The current HTML body into which the data is
+                                   assembled
+        @param: bucket_letter   -- The AlphabeticIndex bucket for this event
+        @param: bucket_link     -- ????
+        @param: first_letter    -- Whether this is the first event for this
+                                   letter
+        @param: event_displayed -- List of events already displayed
+        @param: first_type      -- Whether this is the first event of this type
+        @param: event_handle    -- Handle of the event to be output
+
+        :returns: Returns a tuple of updated ldatec, first_letter, first_type,
+                    _event_displayed
+        :rtype: tuple
+
+        """
+        event = self.r_db.get_event_from_handle(event_handle)
+        _type = event.get_type()
+        gid = event.get_gramps_id()
+        if event.get_change_time() > ldatec:
+            ldatec = event.get_change_time()
+        # check to see if we have listed this gramps_id yet?
+        if gid not in _event_displayed:
+            if int(_type) in _EVENTMAP:
+                handle_list = set(self.r_db.find_backlink_handles(
+                        event_handle,
+                        include_classes=['Family', 'Person']))
+            else:
+                handle_list = set(
+                    self.r_db.find_backlink_handles(
+                        event_handle,
+                        include_classes=['Person']))
+            if handle_list:
+                trow = Html("tr")
+                tbody += trow
+                # set up hyperlinked letter for alphabet_navigation
+                tcell = Html("td", class_="ColumnLetter", inline=True)
+                trow += tcell
+                if first_letter:
+                    first_letter = False
+                    first_type = False
+                    # Update the ColumnLetter cell and
+                    # create a populated ColumnType
+                    t_a = 'class = "BeginLetter BeginType"'
+                    trow.attr = t_a
+                    letter = bucket_letter
+                    ttle = self._("Event types beginning "
+                        "with letter %s") % letter
+                    tcell += Html("a", letter, name=letter, id_=bucket_link,
+                                  title=ttle)
+                    tcell = Html("td", class_="ColumnType",
+                                 title=self._(event_type),
+                                 inline=True)
+                    tcell += self._(event_type)
+                elif first_type:
+                    first_type = False
+                    # Update the ColumnLetter cell and
+                    # create a populated ColumnType cell
+                    trow.attr = 'class = "BeginType"'
+                    tcell = Html("td", class_="ColumnType",
+                                 title=self._(event_type), inline=True)
+                    tcell += self._(event_type)
+                else:
+                    tcell = Html("td", class_="ColumnType",
+                                 title="&nbsp;", inline=True)
+                    tcell += "&nbsp;" # create a non-populated ColumnType
+                # Add the ColumnType cell
+                trow += tcell
+                # event date
+                tcell = Html("td", class_="ColumnDate", inline=True)
+                trow += tcell
+                date = Date.EMPTY
+                if event:
+                    date = event.get_date_object()
+                    if date and date is not Date.EMPTY:
+                        tcell += self.rlocale.get_date(date)
+                else:
+                    tcell += "&nbsp;"
+                # Gramps ID
+                trow += Html("td", class_="ColumnGRAMPSID") \
+                        + (self.event_grampsid_link(event_handle, gid, None))
+                # Person(s) column
+                tcell = Html("td", class_="ColumnPerson")
+                trow += tcell
+                # classname can either be a person or a family
+                first_person = True
+                # get person(s) for ColumnPerson
+                sorted_list = sorted(handle_list)
+                self.complete_people(tcell, first_person, sorted_list,
+                                     uplink=False)
+        _event_displayed.append(gid)
+        return (ldatec, first_letter, first_type, _event_displayed)
+
    def eventlistpage(self, report, the_lang, the_title,
                      event_types, event_handle_list):
        """
@ -142,7 +242,6 @@ class EventPages(BasePage):
        """
        BasePage.__init__(self, report, the_lang, the_title)
        ldatec = 0
-        prev_letter = " "

        output_file, sio = self.report.create_file("events")
        result = self.write_header(self._("Events"))
@ -159,8 +258,21 @@ class EventPages(BasePage):
            eventlist += Html("p", msg, id="description")

            # get alphabet navigation...
-            index_list = get_first_letters(self.r_db, event_types,
-                                           _ALPHAEVENT)
+            # Assemble all the event types
+            index = AlphabeticIndex(self.rlocale)
+            for (event_type, data_list) in sort_event_types(self.r_db,
+                                                event_types,
+                                                event_handle_list,
+                                                self.rlocale):
+                index.addRecord(event_type, data_list)
+
+            # Extract the buckets from the index
+            index_list = []
+            index.resetBucketIterator()
+            while index.nextBucket():
+                if index.bucketRecordCount != 0:
+                    index_list.append(index.bucketLabel)
+            # Output the navigation
            alpha_nav = alphabet_navigation(index_list, self.rlocale)
            if alpha_nav:
                eventlist += alpha_nav
@ -191,123 +303,37 @@ class EventPages(BasePage):
                tbody = Html("tbody")
                table += tbody

-                # separate events by their type and then thier event handles
-                savevtyp = " "
-                for (evt_type,
-                     data_list) in sort_event_types(self.r_db,
-                                                    event_types,
-                                                    event_handle_list,
-                                                    self.rlocale):
-                    first = True
-                    _event_displayed = []
-
-                    # sort datalist by date of event and by event handle...
-                    data_list = sorted(data_list, key=itemgetter(0, 1))
-                    first_event = True
-
-                    for (dummy_sort_value, event_handle) in data_list:
-                        event = self.r_db.get_event_from_handle(event_handle)
-                        _type = event.get_type()
-                        gid = event.get_gramps_id()
-                        if event.get_change_time() > ldatec:
-                            ldatec = event.get_change_time()
-
-                        # check to see if we have listed this gramps_id yet?
-                        if gid not in _event_displayed:
-
-                            # family event
-                            if int(_type) in _EVENTMAP:
-                                handle_list = set(
-                                    self.r_db.find_backlink_handles(
-                                        event_handle,
-                                        include_classes=['Family', 'Person']))
-                            else:
-                                handle_list = set(
-                                    self.r_db.find_backlink_handles(
-                                        event_handle,
-                                        include_classes=['Person']))
-                            if handle_list:
-
-                                trow = Html("tr")
-                                tbody += trow
-
-                                # set up hyperlinked letter for
-                                # alphabet_navigation
-                                tcell = Html("td", class_="ColumnLetter",
-                                             inline=True)
-                                trow += tcell
-
-                                if evt_type and not evt_type.isspace():
-                                    letter = get_index_letter(
-                                        self._(str(evt_type)[0].capitalize()),
-                                        index_list, self.rlocale)
-                                    if letter != savevtyp:
-                                        savevtyp = letter
-                                    else:
-                                        letter = "&nbsp;"
-                                else:
-                                    letter = "&nbsp;"
-
-                                if first or primary_difference(letter,
-                                                               prev_letter,
-                                                               self.rlocale):
-                                    first = False
-                                    prev_letter = letter
-                                    t_a = 'class = "BeginLetter BeginType"'
-                                    trow.attr = t_a
-                                    ttle = self._("Event types beginning "
-                                                  "with letter %s") % letter
-                                    tcell += Html("a", letter, name=letter,
-                                                  id_=letter, title=ttle,
-                                                  inline=True)
-                                else:
-                                    tcell += "&nbsp;"
-
-                                # display Event type if first in the list
-                                tcell = Html("td", class_="ColumnType",
-                                             title=self._(evt_type),
-                                             inline=True)
-                                trow += tcell
-                                if first_event:
-                                    tcell += self._(evt_type)
-                                    if trow.attr == "":
-                                        trow.attr = 'class = "BeginType"'
-                                else:
-                                    tcell += "&nbsp;"
-
-                                # event date
-                                tcell = Html("td", class_="ColumnDate",
-                                             inline=True)
-                                trow += tcell
-                                date = Date.EMPTY
-                                if event:
-                                    date = event.get_date_object()
-                                    if date and date is not Date.EMPTY:
-                                        tcell += self.rlocale.get_date(date)
-                                else:
-                                    tcell += "&nbsp;"
-
-                                # Gramps ID
-                                trow += Html("td", class_="ColumnGRAMPSID") + (
-                                    self.event_grampsid_link(event_handle,
-                                                             gid, None)
-                                    )
-
-                                # Person(s) column
-                                tcell = Html("td", class_="ColumnPerson")
-                                trow += tcell
-
-                                # classname can either be a person or a family
-                                first_person = True
-
-                                # get person(s) for ColumnPerson
-                                sorted_list = sorted(handle_list)
-                                self.complete_people(tcell, first_person,
-                                                     sorted_list,
-                                                     uplink=False)
-
-                        _event_displayed.append(gid)
-                        first_event = False
+                # for each bucket, output the events in that bucket
+                index.resetBucketIterator()
+                output = []
+                dup_index = 0
+                while index.nextBucket():
+                    if index.bucketRecordCount != 0:
+                        bucket_letter = index.bucketLabel
+                        bucket_link = bucket_letter
+                        if bucket_letter in output:
+                            bucket_link = "%s (%i)" % (bucket_letter, dup_index)
+                            dup_index += 1
+                        output.append(bucket_letter)
+                        first_letter = True
+                        while index.nextRecord():
+                            _event_displayed = []
+                            first_type = True
+                            event_type = index.recordName
+                            data_list = index.recordData
+                            # sort datalist by date of event and by event
+                            # handle...
+                            data_list = sorted(data_list, key=itemgetter(0, 1))
+                            for (dummy_sort_value, event_handle) in data_list:
+                                (ldatec, first_letter, first_type,
+                                _event_displayed) \
+                                = self.__output_event(ldatec, event_type,
+                                                           tbody, bucket_letter,
+                                                           bucket_link,
+                                                           first_letter,
+                                                           _event_displayed,
+                                                           first_type,
+                                                           event_handle)

        # add clearline for proper styling
        # add footer section
--- a/gramps/plugins/webreport/family.py
+++ b/gramps/plugins/webreport/family.py
@ -41,7 +41,7 @@ Classe:
 #------------------------------------------------
 # python modules
 #------------------------------------------------
-from collections import defaultdict
+from collections import defaultdict, OrderedDict
 from decimal import getcontext
 import logging

@ -58,11 +58,11 @@ from gramps.plugins.lib.libhtml import Html
 #------------------------------------------------
 from gramps.plugins.webreport.basepage import BasePage
 from gramps.gen.display.name import displayer as _nd
-from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON,
-                                             alphabet_navigation, sort_people,
-                                             primary_difference, first_letter,
+from gramps.plugins.webreport.common import (alphabet_navigation,
                                             html_escape,
-                                             FULLCLEAR, get_index_letter)
+                                             FULLCLEAR,
+                                             AlphabeticIndex,
+                                             get_surname_from_person)

 _ = glocale.translation.sgettext
 LOG = logging.getLogger(".NarrativeWeb")
@ -122,6 +122,97 @@ class FamilyPages(BasePage):
            self.familylistpage(self.report, the_lang, the_title,
                                self.report.obj_dict[Family].keys())

+
+    def __output_family(self, ldatec, family_handle, person_handle,
+                        tbody, letter, bucket_link, first_person, first_family):
+        """
+        Generate and output the data for a single family
+
+        @param: ldatec          -- Last change date and time (updated)
+        @param: family_handle   -- The family_handle to be output
+        @param: person_handle   -- The person_handle to be output
+        @param: tbody           -- The current HTML body into which the data is
+                                   assembled
+        @param: letter          -- The AlphabeticIndex bucket for this event
+        @param: first_person    -- Whether this is the first person for this
+                                   letter
+        @param: first_family    -- Whether this is the first family of this
+                                   person
+
+        @returns: Returns a tuple of updated (ldatec, first_person,
+                                              first_family)
+        @rtype: tuple
+        """
+        family = self.r_db.get_family_from_handle(family_handle)
+        if family.get_change_time() > ldatec:
+            ldatec = family.get_change_time()
+
+        trow = Html("tr")
+        tbody += trow
+        tcell = Html("td", class_="ColumnRowLabel")
+        trow += tcell
+        if first_person:
+            first_person = False
+            first_family = False
+            # Update the ColumnRowLabel cell
+            trow.attr = 'class="BeginLetter BeginFamily"'
+            ttle = self._("Families beginning with "
+                "letter ")
+            tcell += Html("a", letter, name=letter, title=ttle + letter,
+                          id_=bucket_link)
+            #  and create the populated ColumnPartner for the person
+            tcell = Html("td", class_="ColumnPartner")
+            tcell += self.new_person_link(person_handle, uplink=self.uplink)
+            trow += tcell
+        elif first_family:
+            first_family = False
+            # Update the ColumnRowLabel cell
+            trow.attr = 'class ="BeginFamily"'
+            #  and create the populated ColumnPartner for the person
+            tcell = Html("td", class_="ColumnPartner")
+            tcell += self.new_person_link(person_handle, uplink=self.uplink)
+            trow += tcell
+        else:
+            # Create the blank ColumnPartner row for the person
+            tcell = Html("td", class_="ColumnPartner")
+            tcell += '&nbsp;'
+            trow += tcell
+
+        tcell = Html("td", class_="ColumnPartner")
+        trow += tcell
+        tcell += self.family_link(family.get_handle(),
+                                  self.report.get_family_name(family),
+                                  family.get_gramps_id(), self.uplink)
+        # family events; such as marriage and divorce
+        # events
+        fam_evt_ref_list = family.get_event_ref_list()
+        tcell1 = Html("td", class_="ColumnDate", inline=True)
+        tcell2 = Html("td", class_="ColumnDate", inline=True)
+        trow += tcell1, tcell2
+        if fam_evt_ref_list:
+            fam_evt_srt_ref_list = sorted(fam_evt_ref_list,
+                                          key=self.sort_on_grampsid)
+            for evt_ref in fam_evt_srt_ref_list:
+                evt = self.r_db.get_event_from_handle(evt_ref.ref)
+                if evt:
+                    evt_type = evt.get_type()
+                    if evt_type in [EventType.MARRIAGE, EventType.DIVORCE]:
+                        cell = self.rlocale.get_date(evt.get_date_object())
+                        if evt_type == EventType.MARRIAGE:
+                            tcell1 += cell
+                        else:
+                            tcell1 += '&nbsp;'
+                        if evt_type == EventType.DIVORCE:
+                            tcell2 += cell
+                        else:
+                            tcell2 += '&nbsp;'
+
+        else:
+            tcell1 += '&nbsp;'
+            tcell2 += '&nbsp;'
+        first_family = False
+        return (ldatec, first_person, first_family)
+
    def familylistpage(self, report, the_lang, the_title, fam_list):
        """
        Create a family index
@ -138,7 +229,6 @@ class FamilyPages(BasePage):
        result = self.write_header(self._("Families"))
        familieslistpage, dummy_head, dummy_body, outerwrapper = result
        ldatec = 0
-        prev_letter = " "

        # begin Family Division
        with Html("div", class_="content", id="Relationships") as relationlist:
@ -154,7 +244,7 @@ class FamilyPages(BasePage):
            relationlist += Html("p", msg, id="description")

            # go through all the families, and construct a dictionary of all the
-            # people and the families thay are involved in. Note that the people
+            # people and the families they are involved in. Note that the people
            # in the list may be involved in OTHER families, that are not listed
            # because they are not in the original family list.
            pers_fam_dict = defaultdict(list)
@ -170,9 +260,22 @@ class FamilyPages(BasePage):
                    if spouse_handle:
                        pers_fam_dict[spouse_handle].append(family)

+            # Assemble all the people, we no longer care about their families
+            index = AlphabeticIndex(self.rlocale)
+            for (person_handle, dummy_family) in pers_fam_dict.items():
+                person = self.r_db.get_person_from_handle(person_handle)
+                surname = get_surname_from_person(self.r_db, person)
+                index.addRecord(surname, person_handle)
+
+            # Extract the buckets from the index
+            index_list = []
+            index.resetBucketIterator()
+            while index.nextBucket():
+                if index.bucketRecordCount != 0:
+                    index_list.append(index.bucketLabel)
+
+            # Output the navigation
            # add alphabet navigation
-            index_list = get_first_letters(self.r_db, pers_fam_dict.keys(),
-                                           _KEYPERSON, rlocale=self.rlocale)
            alpha_nav = alphabet_navigation(index_list, self.rlocale)
            if alpha_nav:
                relationlist += alpha_nav
@ -187,7 +290,7 @@ class FamilyPages(BasePage):
                trow = Html("tr")
                thead += trow

-               # set up page columns
+                # set up page columns
                trow.extend(
                    Html("th", trans, class_=colclass, inline=True)
                    for trans, colclass in [(self._("Letter"),
@ -201,108 +304,56 @@ class FamilyPages(BasePage):
                tbody = Html("tbody")
                table += tbody

-                # begin displaying index list
-                ppl_handle_list = sort_people(self.r_db, pers_fam_dict.keys(),
-                                              self.rlocale)
-                first = True
-                for (surname, handle_list) in ppl_handle_list:
-
-                    if surname and not surname.isspace():
-                        letter = get_index_letter(first_letter(surname),
-                                                  index_list,
-                                                  self.rlocale)
-                    else:
-                        letter = '&nbsp;'
-
-                    # get person from sorted database list
-                    for person_handle in sorted(
-                            handle_list, key=self.sort_on_name_and_grampsid):
-                        person = self.r_db.get_person_from_handle(person_handle)
-                        if person:
-                            family_list = person.get_family_handle_list()
-                            first_family = True
-                            for family_handle in family_list:
-                                get_family = self.r_db.get_family_from_handle
-                                family = get_family(family_handle)
-                                trow = Html("tr")
-                                tbody += trow
-
-                                tcell = Html("td", class_="ColumnRowLabel")
-                                trow += tcell
-
-                                if first or primary_difference(letter,
-                                                               prev_letter,
-                                                               self.rlocale):
-                                    first = False
-                                    prev_letter = letter
-                                    trow.attr = 'class="BeginLetter"'
-                                    ttle = self._("Families beginning with "
-                                                  "letter ")
-                                    tcell += Html("a", letter, name=letter,
-                                                  title=ttle + letter,
-                                                  inline=True)
-                                else:
-                                    tcell += '&nbsp;'
-
-                                tcell = Html("td", class_="ColumnPartner")
-                                trow += tcell
-
-                                if first_family:
-                                    trow.attr = 'class ="BeginFamily"'
-
-                                    tcell += self.new_person_link(
-                                        person_handle, uplink=self.uplink)
-
-                                    first_family = False
-                                else:
-                                    tcell += '&nbsp;'
-
-                                tcell = Html("td", class_="ColumnPartner")
-                                trow += tcell
-
-                                tcell += self.family_link(
-                                    family.get_handle(),
-                                    self.report.get_family_name(family),
-                                    family.get_gramps_id(), self.uplink)
-
-                                # family events; such as marriage and divorce
-                                # events
-                                fam_evt_ref_list = family.get_event_ref_list()
-                                tcell1 = Html("td", class_="ColumnDate",
-                                              inline=True)
-                                tcell2 = Html("td", class_="ColumnDate",
-                                              inline=True)
-                                trow += (tcell1, tcell2)
-
-                                if fam_evt_ref_list:
-                                    fam_evt_srt_ref_list = sorted(
-                                        fam_evt_ref_list,
-                                        key=self.sort_on_grampsid)
-                                    for evt_ref in fam_evt_srt_ref_list:
-                                        evt = self.r_db.get_event_from_handle(
-                                            evt_ref.ref)
-                                        if evt:
-                                            evt_type = evt.get_type()
-                                            if evt_type in [EventType.MARRIAGE,
-                                                            EventType.DIVORCE]:
-
-                                                cell = self.rlocale.get_date(
-                                                    evt.get_date_object())
-                                                if (evt_type ==
-                                                        EventType.MARRIAGE):
-                                                    tcell1 += cell
-                                                else:
-                                                    tcell1 += '&nbsp;'
-
-                                                if (evt_type ==
-                                                        EventType.DIVORCE):
-                                                    tcell2 += cell
-                                                else:
-                                                    tcell2 += '&nbsp;'
-                                else:
-                                    tcell1 += '&nbsp;'
-                                    tcell2 += '&nbsp;'
-                                first_family = False
+                # for each bucket, output the people and their families in that
+                # bucket
+                index.resetBucketIterator()
+                output = []
+                dup_index = 0
+                while index.nextBucket():
+                    if index.bucketRecordCount != 0:
+                        bucket_letter = index.bucketLabel
+                        bucket_link = bucket_letter
+                        if bucket_letter in output:
+                            bucket_link = "%s (%i)" % (bucket_letter, dup_index)
+                            dup_index += 1
+                        output.append(bucket_letter)
+                        # Assemble a dict of all the people in this bucket.
+                        surname_ppl_handle_dict = OrderedDict()
+                        while index.nextRecord():
+                            # The records are returned sorted by recordName,
+                            # which is surname. we need to retain that order but
+                            # in addition sort by the rest of the name
+                            person_surname = index.recordName
+                            person_handle = index.recordData
+                            if person_surname in surname_ppl_handle_dict.keys():
+                                surname_ppl_handle_dict[person_surname]\
+                                    .append(person_handle)
+                            else:
+                                surname_ppl_handle_dict[person_surname] = \
+                                            [person_handle]
+                        first_person = True
+                        for (surname, handle_list) in \
+                                    surname_ppl_handle_dict.items():
+                            # get person from sorted database list
+                            for person_handle in sorted(
+                                    handle_list,
+                                    key=self.sort_on_name_and_grampsid):
+                                person = self.r_db.get_person_from_handle\
+                                                    (person_handle)
+                                if person:
+                                    family_list = person.\
+                                                    get_family_handle_list()
+                                    first_family = True
+                                    for family_handle in family_list:
+                                        (ldatec, first_person, first_family) \
+                                        = self.__output_family(ldatec,
+                                                               family_handle,
+                                                               person_handle,
+                                                               tbody,
+                                                               bucket_letter,
+                                                               bucket_link,
+                                                               first_person,
+                                                               first_family)

        # add clearline for proper styling
        # add footer section
--- a/gramps/plugins/webreport/narrativeweb.py
+++ b/gramps/plugins/webreport/narrativeweb.py
@ -2887,105 +2887,3 @@ class NavWebOptions(MenuReportOptions):
            self.__alive.set_available(False)
            self.__death_anniv.set_available(False)
            self.__after_year.set_available(False)
-
-# See : http://www.gramps-project.org/bugs/view.php?id = 4423
-
-# Contraction data taken from CLDR 22.1. Only the default variant is considered.
-# The languages included below are, by no means, all the languages that have
-# contractions - just a sample of languages that have been supported
-
-# At the time of writing (Feb 2013), the following languages have greater that
-# 50% coverage of translation of Gramps: bg Bulgarian, ca Catalan, cs Czech, da
-# Danish, de German, el Greek, en_GB, es Spanish, fi Finish, fr French, he
-# Hebrew, hr Croation, hu Hungarian, it Italian, ja Japanese, lt Lithuanian, nb
-# Noregian Bokmål, nn Norwegian Nynorsk, nl Dutch, pl Polish, pt_BR Portuguese
-# (Brazil), pt_P Portugeuse (Portugal), ru Russian, sk Slovak, sl Slovenian, sv
-# Swedish, vi Vietnamese, zh_CN Chinese.
-
-# Key is the language (or language and country), Value is a list of
-# contractions. Each contraction consists of a tuple. First element of the
-# tuple is the list of characters, second element is the string to use as the
-# index entry.
-
-# The DUCET contractions (e.g. LATIN CAPIAL LETTER L, MIDDLE DOT) are ignored,
-# as are the supresscontractions in some locales.
-
-CONTRACTIONS_DICT = {
-    # bg Bulgarian validSubLocales="bg_BG" no contractions
-    # ca Catalan validSubLocales="ca_AD ca_ES"
-    "ca" : [(("l·", "L·"), "L")],
-    # Czech, validSubLocales="cs_CZ" Czech_Czech Republic
-    "cs" : [(("ch", "cH", "Ch", "CH"), "CH")],
-    # Danish validSubLocales="da_DK" Danish_Denmark
-    "da" : [(("aa", "Aa", "AA"), "Å")],
-    # de German validSubLocales="de_AT de_BE de_CH de_DE de_LI de_LU" no
-    # contractions in standard collation.
-    # el Greek validSubLocales="el_CY el_GR" no contractions.
-    # es Spanish validSubLocales="es_419 es_AR es_BO es_CL es_CO es_CR es_CU
-    # es_DO es_EA es_EC es_ES es_GQ es_GT es_HN es_IC es_MX es_NI es_PA es_PE
-    # es_PH es_PR es_PY es_SV es_US es_UY es_VE" no contractions in standard
-    # collation.
-    # fi Finish validSubLocales="fi_FI" no contractions in default (phonebook)
-    # collation.
-    # fr French no collation data.
-    # he Hebrew validSubLocales="he_IL" no contractions
-    # hr Croation validSubLocales="hr_BA hr_HR"
-    "hr" : [(("dž", "Dž"), "dž"),
-            (("lj", "Lj", 'LJ'), "Ǉ"),
-            (("Nj", "NJ", "nj"), "Ǌ")],
-    # Hungarian hu_HU for two and three character contractions.
-    "hu" : [(("cs", "Cs", "CS"), "CS"),
-            (("dzs", "Dzs", "DZS"), "DZS"), # order is important
-            (("dz", "Dz", "DZ"), "DZ"),
-            (("gy", "Gy", "GY"), "GY"),
-            (("ly", "Ly", "LY"), "LY"),
-            (("ny", "Ny", "NY"), "NY"),
-            (("sz", "Sz", "SZ"), "SZ"),
-            (("ty", "Ty", "TY"), "TY"),
-            (("zs", "Zs", "ZS"), "ZS")
-           ],
-    # it Italian no collation data.
-    # ja Japanese unable to process the data as it is too complex.
-    # lt Lithuanian no contractions.
-    # Norwegian Bokmål
-    "nb" : [(("aa", "Aa", "AA"), "Å")],
-    # nn Norwegian Nynorsk validSubLocales="nn_NO"
-    "nn" : [(("aa", "Aa", "AA"), "Å")],
-    # nl Dutch no collation data.
-    # pl Polish validSubLocales="pl_PL" no contractions
-    # pt Portuguese no collation data.
-    # ru Russian validSubLocales="ru_BY ru_KG ru_KZ ru_MD ru_RU ru_UA" no
-    # contractions
-    # Slovak,  validSubLocales="sk_SK" Slovak_Slovakia
-    # having DZ in Slovak as a contraction was rejected in
-    # http://unicode.org/cldr/trac/ticket/2968
-    "sk" : [(("ch", "cH", "Ch", "CH"), "Ch")],
-    # sl Slovenian validSubLocales="sl_SI" no contractions
-    # sv Swedish validSubLocales="sv_AX sv_FI sv_SE" default collation is
-    # "reformed" no contractions.
-    # vi Vietnamese validSubLocales="vi_VN" no contractions.
-    # zh Chinese validSubLocales="zh_Hans zh_Hans_CN zh_Hans_SG" no contractions
-    # in Latin characters the others are too complex.
-    }
-
-    # The comment below from the glibc locale sv_SE in
-    # localedata/locales/sv_SE :
-    #
-    # % The letter w is normally not present in the Swedish alphabet. It
-    # % exists in some names in Swedish and foreign words, but is accounted
-    # % for as a variant of 'v'.  Words and names with 'w' are in Swedish
-    # % ordered alphabetically among the words and names with 'v'. If two
-    # % words or names are only to be distinguished by 'v' or % 'w', 'v' is
-    # % placed before 'w'.
-    #
-    # See : http://www.gramps-project.org/bugs/view.php?id = 2933
-    #
-
-# HOWEVER: the characters V and W in Swedish are not considered as a special
-# case for several reasons. (1) The default collation for Swedish (called the
-# 'reformed' collation type) regards the difference between 'v' and 'w' as a
-# primary difference. (2) 'v' and 'w' in the 'standard' (non-default) collation
-# type are not a contraction, just a case where the difference is secondary
-# rather than primary. (3) There are plenty of other languages where a
-# difference that is primary in other languages is secondary, and those are not
-# specially handled.
--- a/gramps/plugins/webreport/person.py
+++ b/gramps/plugins/webreport/person.py
@ -71,16 +71,15 @@ from gramps.gen.relationship import get_relationship_calculator
 # specific narrative web import
 #------------------------------------------------
 from gramps.plugins.webreport.basepage import BasePage
-from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON,
-                                             alphabet_navigation, sort_people,
-                                             first_letter,
-                                             get_index_letter, add_birthdate,
-                                             primary_difference, FULLCLEAR,
+from gramps.plugins.webreport.common import (alphabet_navigation,
+                                             add_birthdate, FULLCLEAR,
                                             _find_birth_date, _find_death_date,
                                             MARKER_PATH, OPENLAYER,
                                             OSM_MARKERS, STAMEN_MARKERS,
                                             GOOGLE_MAPS, MARKERS, html_escape,
-                                             DROPMASTERS, FAMILYLINKS)
+                                             DROPMASTERS, FAMILYLINKS,
+                                             get_surname_from_person,
+                                             AlphabeticIndex)
 from gramps.plugins.webreport.layout import LayoutTree
 from gramps.plugins.webreport.buchheim import buchheim

@ -167,6 +166,139 @@ class PersonPages(BasePage):
 #    creates the Individual List Page
 #
 #################################################
+
+    def __output_person(self, date, tbody, bucket_letter, bucket_link,
+                        showbirth, showdeath, showpartner, showparents,
+                        surname, surnamed, first_surname, first_individual,
+                        person_handle):
+        """
+        Generate and output the data for a single person
+        """
+        person = self.r_db.get_person_from_handle(person_handle)
+        if person.get_change_time() > date:
+            date = person.get_change_time()
+        # surname column
+        trow = Html("tr")
+        tbody += trow
+        tcell = Html("td", class_="ColumnSurname", inline=True)
+        trow += tcell
+        if first_surname:
+            first_surname = False
+            first_individual = False
+            trow.attr = 'class = "BeginSurname"'
+            ttle = self._("Surnames %(surname)s beginning "
+                          "with letter %(letter)s" %
+                          {'surname':surname, 'letter':bucket_letter})
+            tcell += Html("a", html_escape(surnamed), name=bucket_letter,
+                          id_=bucket_link, title=ttle)
+        elif first_individual:
+            first_individual = False
+            tcell += Html("a", html_escape(surnamed),
+                          title=self._("Surnames") + " " + surname)
+        else:
+            tcell += "&nbsp;"
+        # firstname column
+        link = self.new_person_link(person_handle, person=person)
+        trow += Html("td", link, class_="ColumnName")
+        # birth column
+        if showbirth:
+            tcell = Html("td", class_="ColumnBirth", inline=True)
+            trow += tcell
+            birth_date = _find_birth_date(self.r_db, person)
+            if birth_date is not None:
+                if birth_date.fallback:
+                    tcell += Html('em', self.rlocale.get_date(birth_date),
+                                  inline=True)
+                else:
+                    tcell += self.rlocale.get_date(birth_date)
+            else:
+                tcell += "&nbsp;"
+        # death column
+        if showdeath:
+            tcell = Html("td", class_="ColumnDeath", inline=True)
+            trow += tcell
+            death_date = _find_death_date(self.r_db, person)
+            if death_date is not None:
+                if death_date.fallback:
+                    tcell += Html('em', self.rlocale.get_date(death_date),
+                                  inline=True)
+                else:
+                    tcell += self.rlocale.get_date(death_date)
+            else:
+                tcell += "&nbsp;"
+        # partner column
+        if showpartner:
+            family_list = person.get_family_handle_list()
+            first_family = True
+            #partner_name = None
+            tcell = ()
+            if family_list:
+                for family_handle in family_list:
+                    family = self.r_db.get_family_from_handle(family_handle)
+                    partner_handle = utils.find_spouse(
+                        person, family)
+                    if partner_handle:
+                        if not first_family:
+                            # have to do this to get the comma on
+                            # the same line as the link
+                            if isinstance(tcell[-1], Html):
+                                # tcell is an instance of Html (or
+                                # of a subclass thereof)
+                                tcell[-1].inside += ","
+                            else:
+                                tcell = tcell[:-1] + (
+                                        # TODO for Arabic, translate?
+                                        (tcell[-1] + ", "), )
+                        # Have to manipulate as tuples so that
+                        # subsequent people are not nested
+                        # within the first link
+                        tcell += (self.new_person_link(partner_handle),)
+                        first_family = False
+
+            else:
+                tcell = "&nbsp;"
+            trow += Html("td", class_="ColumnPartner") + tcell
+        # parents column
+        if showparents:
+            parent_hdl_list = person.get_parent_family_handle_list()
+            if parent_hdl_list:
+                parent_handle = parent_hdl_list[0]
+                family = self.r_db.get_family_from_handle(parent_handle)
+                father_handle = family.get_father_handle()
+                mother_handle = family.get_mother_handle()
+                if father_handle:
+                    father = self.r_db.get_person_from_handle(father_handle)
+                else:
+                    father = None
+                if mother_handle:
+                    mother = self.r_db.get_person_from_handle(mother_handle)
+                else:
+                    mother = None
+                if father:
+                    father_name = self.get_name(father)
+                if mother:
+                    mother_name = self.get_name(mother)
+                samerow = False
+                if mother and father:
+                    tcell = (Html("span", father_name,
+                                  class_="father fatherNmother", inline=True),
+                             Html("span", mother_name,
+                                  class_="mother", inline=True))
+                elif mother:
+                    tcell = Html("span", mother_name, class_="mother",
+                                 inline=True)
+                elif father:
+                    tcell = Html("span", father_name, class_="father",
+                                 inline=True)
+                else:
+                    tcell = "&nbsp;"
+                    samerow = True
+            else:
+                tcell = "&nbsp;"
+                samerow = True
+            trow += Html("td", class_="ColumnParents", inline=samerow) + tcell
+        return (date, first_surname, first_individual)
+
    def individuallistpage(self, report, the_lang, the_title, ppl_handle_list):
        """
        Creates an individual page
@ -179,7 +311,6 @@ class PersonPages(BasePage):
                                   to create a page.
        """
        BasePage.__init__(self, report, the_lang, the_title)
-        prev_letter = " "

        # plugin variables for this module
        showbirth = report.options['showbirth']
@ -205,8 +336,27 @@ class PersonPages(BasePage):
            individuallist += Html("p", msg, id="description")

            # add alphabet navigation
-            index_list = get_first_letters(self.r_db, ppl_handle_list,
-                                           _KEYPERSON, rlocale=self.rlocale)
+            # Assemble all the handles for each surname into a dictionary
+            # We don't call sort_people because we don't care about sorting
+            # individuals, only surnames
+            surname_handle_dict = defaultdict(list)
+            for person_handle in ppl_handle_list:
+                person = self.r_db.get_person_from_handle(person_handle)
+                surname = get_surname_from_person(self.r_db, person)
+                surname_handle_dict[surname].append(person_handle)
+
+            # Assemble the alphabeticIndex
+            index = AlphabeticIndex(self.rlocale)
+            for surname, handle_list in surname_handle_dict.items():
+                index.addRecord(surname, handle_list)
+
+            # Extract the buckets from the index
+            index_list = []
+            index.resetBucketIterator()
+            while index.nextBucket():
+                if index.bucketRecordCount != 0:
+                    index_list.append(index.bucketLabel)
+            # Output the navigation
            alpha_nav = alphabet_navigation(index_list, self.rlocale)
            if alpha_nav is not None:
                individuallist += alpha_nav
@ -248,179 +398,57 @@ class PersonPages(BasePage):
            tbody = Html("tbody")
            table += tbody

-            ppl_handle_list = sort_people(self.r_db, ppl_handle_list,
-                                          self.rlocale)
-            first = True
-            name_format = self.report.options['name_format']
-            nme_format = _nd.name_formats[name_format][1]
-            for (surname, handle_list) in ppl_handle_list:
+            # for each bucket, output the surnames in that bucket
+            index.resetBucketIterator()
+            output = []
+            dup_index = 0
+            while index.nextBucket():
+                if index.bucketRecordCount != 0:
+                    surname_handle_dict = defaultdict(list)
+                    bucket_letter = index.bucketLabel
+                    bucket_link = bucket_letter
+                    if bucket_letter in output:
+                        bucket_link = "%s (%i)" % (bucket_letter, dup_index)
+                        dup_index += 1
+                    output.append(bucket_letter)
+                    while index.nextRecord():
+                        surname = index.recordName
+                        handle_list = index.recordData
+                        for handle in handle_list:
+                            surname_handle_dict[surname].append(handle)
+                    surname_handle_list = list(surname_handle_dict.items())
+                    # sort by surname
+                    surname_handle_list.sort(key=lambda x:
+                                             self.rlocale.sort_key(x[0]))

-                if surname and not surname.isspace():
-                    letter = get_index_letter(first_letter(surname), index_list,
-                                              self.rlocale)
-                else:
-                    letter = '&nbsp'
-                    surname = self._("<absent>")
+                    name_format = self.report.options['name_format']
+                    nme_format = _nd.name_formats[name_format][1]
+                    for (surname, handle_list) in surname_handle_list:
+                        if not surname or surname.isspace():
+                            surname = self._("<absent>")

-                # In case the user choose a format name like "*SURNAME*"
-                # We must display this field in upper case. So we use the
-                # english format of format_name to find if this is the case.
-                # name_format = self.report.options['name_format']
-                # nme_format = _nd.name_formats[name_format][1]
-                if "SURNAME" in nme_format:
-                    surnamed = surname.upper()
-                else:
-                    surnamed = surname
-                first_surname = True
-                for person_handle in sorted(handle_list,
-                                            key=self.sort_on_name_and_grampsid):
-                    person = self.r_db.get_person_from_handle(person_handle)
-                    if person.get_change_time() > date:
-                        date = person.get_change_time()
-
-                    # surname column
-                    trow = Html("tr")
-                    tbody += trow
-                    tcell = Html("td", class_="ColumnSurname", inline=True)
-                    trow += tcell
-
-                    if first or primary_difference(letter, prev_letter,
-                                                   self.rlocale):
-                        first = False
-                        first_surname = False
-                        prev_letter = letter
-                        trow.attr = 'class = "BeginSurname"'
-                        ttle = self._("Surnames %(surname)s beginning "
-                                      "with letter %(letter)s" %
-                                      {'surname' : surname,
-                                       'letter' : letter})
-                        tcell += Html(
-                            "a", html_escape(surnamed), name=letter,
-                            id_=letter,
-                            title=ttle)
-                    elif first_surname:
-                        first_surname = False
-                        tcell += Html("a", html_escape(surnamed),
-                                      title=self._("Surnames") + " " + surname)
-                    else:
-                        tcell += "&nbsp;"
-
-                    # firstname column
-                    link = self.new_person_link(person_handle, person=person)
-                    trow += Html("td", link, class_="ColumnName")
-
-                    # birth column
-                    if showbirth:
-                        tcell = Html("td", class_="ColumnBirth", inline=True)
-                        trow += tcell
-
-                        birth_date = _find_birth_date(self.r_db, person)
-                        if birth_date is not None:
-                            if birth_date.fallback:
-                                tcell += Html('em',
-                                              self.rlocale.get_date(birth_date),
-                                              inline=True)
-                            else:
-                                tcell += self.rlocale.get_date(birth_date)
+                        # In case the user choose a format name like "*SURNAME*"
+                        # We must display this field in upper case. So we use
+                        # the english format of format_name to find if this is
+                        # the case. name_format =
+                        # self.report.options['name_format'] nme_format =
+                        # _nd.name_formats[name_format][1]
+                        if "SURNAME" in nme_format:
+                            surnamed = surname.upper()
                        else:
-                            tcell += "&nbsp;"
-
-                    # death column
-                    if showdeath:
-                        tcell = Html("td", class_="ColumnDeath", inline=True)
-                        trow += tcell
-
-                        death_date = _find_death_date(self.r_db, person)
-                        if death_date is not None:
-                            if death_date.fallback:
-                                tcell += Html('em',
-                                              self.rlocale.get_date(death_date),
-                                              inline=True)
-                            else:
-                                tcell += self.rlocale.get_date(death_date)
-                        else:
-                            tcell += "&nbsp;"
-
-                    # partner column
-                    if showpartner:
-
-                        family_list = person.get_family_handle_list()
-                        first_family = True
-                        #partner_name = None
-                        tcell = ()
-                        if family_list:
-                            for family_handle in family_list:
-                                family = self.r_db.get_family_from_handle(
-                                    family_handle)
-                                partner_handle = utils.find_spouse(
-                                    person, family)
-                                if partner_handle:
-                                    if not first_family:
-                                        # have to do this to get the comma on
-                                        # the same line as the link
-                                        if isinstance(tcell[-1], Html):
-                                            # tcell is an instance of Html (or
-                                            # of a subclass thereof)
-                                            tcell[-1].inside += ","
-                                        else:
-                                            tcell = tcell[:-1] + (
-                                                # TODO for Arabic, translate?
-                                                (tcell[-1] + ", "),)
-                                    # Have to manipulate as tuples so that
-                                    # subsequent people are not nested
-                                    # within the first link
-                                    tcell += (
-                                        self.new_person_link(partner_handle),)
-                                    first_family = False
-                        else:
-                            tcell = "&nbsp;"
-                        trow += Html("td", class_="ColumnPartner") + tcell
-
-                    # parents column
-                    if showparents:
-
-                        parent_hdl_list = person.get_parent_family_handle_list()
-                        if parent_hdl_list:
-                            parent_handle = parent_hdl_list[0]
-                            family = self.r_db.get_family_from_handle(
-                                parent_handle)
-                            father_handle = family.get_father_handle()
-                            mother_handle = family.get_mother_handle()
-                            if father_handle:
-                                father = self.r_db.get_person_from_handle(
-                                    father_handle)
-                            else:
-                                father = None
-                            if mother_handle:
-                                mother = self.r_db.get_person_from_handle(
-                                    mother_handle)
-                            else:
-                                mother = None
-                            if father:
-                                father_name = self.get_name(father)
-                            if mother:
-                                mother_name = self.get_name(mother)
-                            samerow = False
-                            if mother and father:
-                                tcell = (Html("span", father_name,
-                                              class_="father fatherNmother",
-                                              inline=True),
-                                         Html("span", mother_name,
-                                              class_="mother", inline=True))
-                            elif mother:
-                                tcell = Html("span", mother_name,
-                                             class_="mother", inline=True)
-                            elif father:
-                                tcell = Html("span", father_name,
-                                             class_="father", inline=True)
-                            else:
-                                tcell = "&nbsp;"
-                                samerow = True
-                        else:
-                            tcell = "&nbsp;"
-                            samerow = True
-                        trow += Html("td", class_="ColumnParents",
-                                     inline=samerow) + tcell
+                            surnamed = surname
+                        first_surname = True
+                        first_individual = True
+                        for person_handle in sorted(handle_list,
+                                        key=self.sort_on_name_and_grampsid):
+                            (date, first_surname, first_individual) \
+                            = self.__output_person(date, tbody, bucket_letter,
+                                                   bucket_link, showbirth,
+                                                   showdeath, showpartner,
+                                                   showparents, surname,
+                                                   surnamed, first_surname,
+                                                   first_individual,
+                                                   person_handle)

        # create clear line for proper styling
        # create footer section
--- a/gramps/plugins/webreport/place.py
+++ b/gramps/plugins/webreport/place.py
@ -61,14 +61,12 @@ from gramps.gen.display.place import displayer as _pd
 # specific narrative web import
 #------------------------------------------------
 from gramps.plugins.webreport.basepage import BasePage
-from gramps.plugins.webreport.common import (first_letter,
-                                             alphabet_navigation, GOOGLE_MAPS,
-                                             primary_difference,
-                                             get_index_letter, FULLCLEAR,
+from gramps.plugins.webreport.common import (alphabet_navigation, GOOGLE_MAPS,
+                                             FULLCLEAR,
                                             MARKER_PATH, OPENLAYER,
                                             OSM_MARKERS, STAMEN_MARKERS,
                                             MARKERS, html_escape,
-                                             sort_places)
+                                             sort_places, AlphabeticIndex)

 _ = glocale.translation.sgettext
 LOG = logging.getLogger(".NarrativeWeb")
@ -164,6 +162,57 @@ class PlacePages(BasePage):
            step()
        self.placelistpage(self.report, the_lang, the_title)

+
+    def __output_place(self, ldatec, tbody,
+                       first_place, pname, place_handle, letter, bucket_link):
+        place = self.r_db.get_place_from_handle(place_handle)
+        if place:
+            if place.get_change_time() > ldatec:
+                ldatec = place.get_change_time()
+            plc_title = pname
+            main_location = get_main_location(self.r_db, place)
+            if not plc_title or plc_title == " ":
+                letter = "&nbsp;"
+            trow = Html("tr")
+            tbody += trow
+            tcell = Html("td", class_="ColumnLetter", inline=True)
+            trow += tcell
+            if first_place:
+                # or primary_difference(letter, prev_letter, self.rlocale):
+                first_place = False
+                # prev_letter = letter
+                trow.attr = 'class = "BeginLetter"'
+                ttle = self._("Places beginning "
+                    "with letter %s") % letter
+                tcell += Html("a", letter, name=letter, title=ttle,
+                              id_=bucket_link)
+            else:
+                tcell += "&nbsp;"
+            trow += Html("td", self.place_link(place.get_handle(),
+                                               plc_title,
+                                               place.get_gramps_id()),
+                                               class_="ColumnName")
+            trow.extend(Html("td", data or "&nbsp;", class_=colclass,
+                             inline=True) for
+                             (colclass, data) in [
+                                 ["ColumnState",
+                                  main_location.get(PlaceType.STATE, '')],
+                                 ["ColumnCountry",
+                                  main_location.get(PlaceType.COUNTRY, '')]])
+            if self.display_coordinates:
+                tcell1 = Html("td", class_="ColumnLatitude", inline=True)
+                tcell2 = Html("td", class_="ColumnLongitude", inline=True)
+                trow += tcell1, tcell2
+                if place.lat and place.long:
+                    latitude, longitude = conv_lat_lon(place.lat, place.long,
+                                                       "DEG")
+                    tcell1 += latitude
+                    tcell2 += longitude
+                else:
+                    tcell1 += '&nbsp;'
+                    tcell2 += '&nbsp;'
+        return (ldatec, first_place)
+
    def placelistpage(self, report, the_lang, the_title):
        """
        Create a place index
@ -179,7 +228,6 @@ class PlacePages(BasePage):
        result = self.write_header(self._("Places"))
        placelistpage, dummy_head, dummy_body, outerwrapper = result
        ldatec = 0
-        prev_letter = " "

        # begin places division
        with Html("div", class_="content", id="Places") as placelist:
@ -193,10 +241,22 @@ class PlacePages(BasePage):
            placelist += Html("p", msg, id="description")

            # begin alphabet navigation
-            pkeys = self.report.obj_dict[PlaceName].keys()
-            index_list = get_first_letters(pkeys, rlocale=self.rlocale)
+            # Assemble all the places
+            index = AlphabeticIndex(self.rlocale)
+            # self.report.obj_dict[PlaceName] is a dict with key place_name and
+            # values (place_fname, place_name, place.gramps_id, event)
+            for (place_name, value) in self.report.obj_dict[PlaceName].items():
+                index.addRecord(place_name, value)
+
+            # Extract the buckets from the index
+            index_list = []
+            index.resetBucketIterator()
+            while index.nextBucket():
+                if index.bucketRecordCount != 0:
+                    index_list.append(index.bucketLabel)
+            # Output the navigation
            alpha_nav = alphabet_navigation(index_list, self.rlocale)
-            if alpha_nav is not None:
+            if alpha_nav:
                placelist += alpha_nav

            # begin places table and table head
@ -234,80 +294,40 @@ class PlacePages(BasePage):
                        ]
                    )

-                handle_list = sort_places(self.r_db,
-                                          self.report.obj_dict[PlaceName],
-                                          self.rlocale)
-                first = True
-
                # begin table body
                tbody = Html("tbody")
                table += tbody

-                for (pname, place_handle) in handle_list:
-                    place = self.r_db.get_place_from_handle(place_handle)
-                    if place:
-                        if place.get_change_time() > ldatec:
-                            ldatec = place.get_change_time()
-                        plc_title = pname
-                        main_location = get_main_location(self.r_db, place)
+                # For each bucket, output the places in that bucket
+                index.resetBucketIterator()
+                output = []
+                dup_index = 0
+                while index.nextBucket():
+                    if index.bucketRecordCount != 0:
+                        bucket_letter = index.bucketLabel
+                        bucket_link = bucket_letter
+                        if bucket_letter in output:
+                            bucket_link = "%s (%i)" % (bucket_letter, dup_index)
+                            dup_index += 1
+                        output.append(bucket_letter)
+                        # Assemble all the places in this bucket into a dict for
+                        # sorting
+                        place_dict = dict()
+                        while index.nextRecord():
+                            place_name = index.recordName
+                            value = index.recordData
+                            place_dict[place_name] = value

-                        if plc_title and plc_title != " ":
-                            letter = get_index_letter(first_letter(plc_title),
-                                                      index_list,
-                                                      self.rlocale)
-                        else:
-                            letter = '&nbsp;'
-
-                        trow = Html("tr")
-                        tbody += trow
-
-                        tcell = Html("td", class_="ColumnLetter", inline=True)
-                        trow += tcell
-                        if first or primary_difference(letter, prev_letter,
-                                                       self.rlocale):
-                            first = False
-                            prev_letter = letter
-                            trow.attr = 'class = "BeginLetter"'
-
-                            ttle = self._("Places beginning "
-                                          "with letter %s") % letter
-                            tcell += Html("a", letter, name=letter, title=ttle)
-                        else:
-                            tcell += "&nbsp;"
-
-                        trow += Html("td",
-                                     self.place_link(
-                                         place.get_handle(),
-                                         plc_title, place.get_gramps_id()),
-                                     class_="ColumnName")
-
-                        trow.extend(
-                            Html("td", data or "&nbsp;", class_=colclass,
-                                 inline=True)
-                            for (colclass, data) in [
-                                ["ColumnState",
-                                 main_location.get(PlaceType.STATE, '')],
-                                ["ColumnCountry",
-                                 main_location.get(PlaceType.COUNTRY, '')]
-                            ]
-                        )
-
-                        if self.display_coordinates:
-                            tcell1 = Html("td", class_="ColumnLatitude",
-                                          inline=True)
-                            tcell2 = Html("td", class_="ColumnLongitude",
-                                          inline=True)
-                            trow += (tcell1, tcell2)
-
-                            if place.lat and place.long:
-                                latitude, longitude = conv_lat_lon(place.lat,
-                                                                   place.long,
-                                                                   "DEG")
-                                tcell1 += latitude
-                                tcell2 += longitude
-                            else:
-                                tcell1 += '&nbsp;'
-                                tcell2 += '&nbsp;'
+                        handle_list = sort_places(self.r_db,
+                                                place_dict,
+                                                self.rlocale)
+                        first_place = True
+                        for (pname, place_handle) in handle_list:
+                            (ldatec, first_place) \
+                            = self.__output_place(ldatec,
+                                                  trow, first_place, pname,
+                                                  place_handle, bucket_letter,
+                                                  bucket_link)

        # add clearline for proper styling
        # add footer section
@ -545,29 +565,3 @@ class PlacePages(BasePage):
        if place_name == apname: # store only the primary named page
            self.xhtml_writer(placepage, output_file, sio, ldatec)

-def get_first_letters(place_list, rlocale=glocale):
-    """
-    get the first letters of the place name list
-
-    @param: handle_list -- The place name list
-
-    The first letter (or letters if there is a contraction) are extracted from
-    """
-    index_list = []
-    for place in place_list:
-        ltr = first_letter(place)
-        index_list.append(ltr)
-
-    # Now remove letters where there is not a primary difference
-    index_list.sort(key=rlocale.sort_key)
-    first = True
-    prev_index = None
-    for nkey in index_list[:]:   #iterate over a slice copy of the list
-        if first or primary_difference(prev_index, nkey, rlocale):
-            first = False
-            prev_index = nkey
-        else:
-            index_list.remove(nkey)
-
-    # return menu set letters for alphabet_navigation
-    return index_list
--- a/gramps/plugins/webreport/surnamelist.py
+++ b/gramps/plugins/webreport/surnamelist.py
@ -43,6 +43,8 @@ Classe:
 #------------------------------------------------
 from decimal import getcontext
 import logging
+from collections import defaultdict
+from unicodedata import name as uniname

 #------------------------------------------------
 # Gramps module
@ -55,11 +57,10 @@ from gramps.plugins.lib.libhtml import Html
 #------------------------------------------------
 from gramps.plugins.webreport.basepage import BasePage
 from gramps.gen.display.name import displayer as _nd
-from gramps.plugins.webreport.common import (get_first_letters, _KEYPERSON,
-                                             alphabet_navigation, html_escape,
-                                             sort_people, name_to_md5,
-                                             first_letter, get_index_letter,
-                                             primary_difference, FULLCLEAR)
+from gramps.plugins.webreport.common import (alphabet_navigation, html_escape,
+                                             name_to_md5, FULLCLEAR,
+                                             get_surname_from_person,
+                                             AlphabeticIndex)

 _ = glocale.translation.sgettext
 LOG = logging.getLogger(".NarrativeWeb")
@ -91,8 +92,6 @@ class SurnameListPage(BasePage):
        @param: filename        -- The name to use for the Surnames page
        """
        BasePage.__init__(self, report, the_lang, the_title)
-        prev_surname = ""
-        prev_letter = " "

        if order_by == self.ORDER_BY_NAME:
            output_file, sio = self.report.create_file(filename)
@ -113,11 +112,30 @@ class SurnameListPage(BasePage):
                         'database with this same surname.')
            surnamelist += Html("p", msg, id="description")

+            # Assemble all the handles for each surname into a dictionary
+            # We don't call sort_people because we don't care about sorting
+            # individuals, only surnames
+            surname_handle_dict = defaultdict(list)
+            for person_handle in ppl_handle_list:
+                person = self.r_db.get_person_from_handle(person_handle)
+                surname = get_surname_from_person(self.r_db, person)
+                surname_handle_dict[surname].append(person_handle)
+
            # add alphabet navigation...
            # only if surname list not surname count
            if order_by == self.ORDER_BY_NAME:
-                index_list = get_first_letters(self.r_db, ppl_handle_list,
-                                               _KEYPERSON, rlocale=self.rlocale)
+                # Assemble the AlphabeticIndex
+                index = AlphabeticIndex(self.rlocale)
+                for surname, handle_list in surname_handle_dict.items():
+                    index.addRecord(surname, handle_list)
+
+                # Extract the buckets from the index
+                index_list = []
+                index.resetBucketIterator()
+                while index.nextBucket():
+                    if index.bucketRecordCount != 0:
+                        index_list.append(index.bucketLabel)
+                # Output the navigation
                alpha_nav = alphabet_navigation(index_list, self.rlocale)
                if alpha_nav is not None:
                    surnamelist += alpha_nav
@ -162,74 +180,36 @@ class SurnameListPage(BasePage):
                with Html("tbody") as tbody:
                    table += tbody

-                    ppl_handle_list = sort_people(self.r_db, ppl_handle_list,
-                                                  self.rlocale)
                    if order_by == self.ORDER_BY_COUNT:
-                        temp_list = {}
-                        for (surname, data_list) in ppl_handle_list:
-                            index_val = "%90d_%s" % (999999999-len(data_list),
-                                                     surname)
-                            temp_list[index_val] = (surname, data_list)
+                        # construct a dictionary of counts, for example
+                        # {3: ["brown", [handle1, handle2,handle3],
+                        #      "smith", [handle4, handle5, handle6]
+                        #     ],
+                        #  2: ["Jones", [handle7]
+                        #
+                        # }]
+                        count_ppl_handle_dict = defaultdict(list)
+                        for surname, data_list in surname_handle_dict.items():
+                            count_ppl_handle_dict[len(data_list)].append \
+                                ((surname,data_list))
+                        # For each count, we construct and output a separate
+                        # AlphabeticIndex for all surnames with that count
+                        for (dummy_count, ppl_handles) in \
+                            sorted(count_ppl_handle_dict.items(), reverse=True):
+                            # Construct the AplhabeticIndex for that count
+                            index = AlphabeticIndex(self.rlocale)
+                            for (surname, handle_list) in ppl_handles:
+                                index.addRecord(surname, handle_list)
+                            # Output the AlphabeticIndex for that count
+                            self.output_surname_records(index, tbody,
+                                                        name_format)

-                        lkey = self.rlocale.sort_key
-                        ppl_handle_list = (temp_list[key]
-                                           for key in sorted(temp_list,
-                                                             key=lkey))
+                    else: # order_by == self.ORDER_BY_NAME
+                        # The AlphabeticIndex has already been constructed
+                        # Output the AlphabeticIndex
+                        self.output_surname_records(index, tbody,
+                                                    name_format)

-                    first = True
-                    first_surname = True
-
-                    for (surname, data_list) in ppl_handle_list:
-
-                        if surname and not surname.isspace():
-                            letter = first_letter(surname)
-                            if order_by == self.ORDER_BY_NAME:
-                                # There will only be an alphabetic index list if
-                                # the ORDER_BY_NAME page is being generated
-                                letter = get_index_letter(letter, index_list,
-                                                          self.rlocale)
-                        else:
-                            letter = '&nbsp;'
-                            surname = self._("<absent>")
-
-                        trow = Html("tr")
-                        tbody += trow
-
-                        tcell = Html("td", class_="ColumnLetter", inline=True)
-                        trow += tcell
-
-                        if first or primary_difference(letter, prev_letter,
-                                                       self.rlocale):
-                            first = False
-                            prev_letter = letter
-                            trow.attr = 'class = "BeginLetter"'
-                            ttle = self._("Surnames beginning with "
-                                          "letter %s") % letter
-                            hyper = Html("a", letter, name=letter,
-                                         title=ttle, inline=True)
-                            tcell += hyper
-                        elif first_surname or surname != prev_surname:
-                            first_surname = False
-                            tcell += "&nbsp;"
-                            prev_surname = surname
-
-                        # In case the user choose a format name like "*SURNAME*"
-                        # We must display this field in upper case. So we use
-                        # the english format of format_name to find if this is
-                        # the case.
-                        # name_format = self.report.options['name_format']
-                        nme_format = _nd.name_formats[name_format][1]
-                        if "SURNAME" in nme_format:
-                            surnamed = surname.upper()
-                        else:
-                            surnamed = surname
-                        trow += Html("td",
-                                     self.surname_link(name_to_md5(surname),
-                                                       surnamed),
-                                     class_="ColumnSurname", inline=True)
-
-                        trow += Html("td", len(data_list),
-                                     class_="ColumnQuantity", inline=True)

        # create footer section
        # add clearline for proper styling
@ -252,10 +232,83 @@ class SurnameListPage(BasePage):
                           the result.
        """
        url = self.report.build_url_fname_html(fname, "srn", uplink)
+        try: # some characters don't have a unicode name
+            char = uniname(name[0])
+        except (ValueError, TypeError) as dummy_err:
+            char = " "
        hyper = Html("a", html_escape(name), href=url,
-                     title=name, inline=True)
+                     title="%s starting with %s" % (name, char),
+                     inline=True)
        if opt_val is not None:
            hyper += opt_val

        # return hyperlink to its caller
        return hyper
+
+    def output_surname_records(self, index, tbody, name_format):
+        """
+        Output all the surnames in the index.
+
+        @param: index   -- An ICU AlphabeticIndex where the names are surnames
+                           and the data is a list of people handles with that
+                           surname
+        @param: tbody   -- The HTML body to which the lines are added
+        @param: name_format -- The name format from the report options
+        """
+        index.resetBucketIterator()
+        output = []
+        dup_index = 0
+        while index.nextBucket():
+            if index.bucketRecordCount != 0:
+                trow = Html("tr")
+                tbody += trow
+
+                tcell = Html("td", class_="ColumnLetter", inline=True)
+                trow += tcell
+                trow.attr = 'class = "BeginLetter"'
+                bucket_letter = index.bucketLabel
+                bucket_link = bucket_letter
+                if bucket_letter in output:
+                    bucket_link = "%s (%i)" % (bucket_letter, dup_index)
+                    dup_index += 1
+                output.append(bucket_letter)
+                try: # some characters don't have a unicode name
+                    char = uniname(bucket_letter)
+                except (ValueError, TypeError) as dummy_err:
+                    char = " "
+                ttle = self._("Surnames beginning with "
+                              "letter '%s' %s") % \
+                              (bucket_letter, char)
+                hyper = Html("a", index.bucketLabel, name=index.bucketLabel,
+                             id_=bucket_link, title=ttle)
+                tcell += hyper
+
+                first = True
+                while index.nextRecord():
+                    surname = index.recordName
+                    if not first:
+                        trow = Html("tr")
+                        tbody += trow
+
+                        tcell = Html("td", class_="ColumnLetter", inline=True)
+                        trow += tcell
+                    first = False
+                    if not surname or surname.isspace():
+                        surname = self._("<absent>")
+                    # In case the user choose a format name like "*SURNAME*"
+                    # We must display this field in upper case. So we use
+                    # the english format of format_name to find if this is
+                    # the case.
+                    # name_format = self.report.options['name_format']
+                    nme_format = _nd.name_formats[name_format][1]
+                    if "SURNAME" in nme_format:
+                        surnamed = surname.upper()
+                    else:
+                        surnamed = surname
+                    trow += Html("td",
+                                 self.surname_link(name_to_md5(surname),
+                                                  surnamed),
+                                 class_="ColumnSurname", inline=True)
+
+                    trow += Html("td", len(index.recordData),
+                                 class_="ColumnQuantity", inline=True)