[HOME]

Path : /lib/python2.7/site-packages/
Upload :
Current File : //lib/python2.7/site-packages/langtable.py

# vim:fileencoding=utf-8:sw=4:et -*- coding: utf-8 -*-

# Copyright (c) 2013 Mike FABIAN <mfabian@redhat.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

######################################################################
# Public API:
#
#     list_locales()
#     list_keyboards()
#     list_consolefonts()
#     list_inputmethods()
#     list_timezones()
#     list_scripts()
#     language_name()
#     territory_name()
#     timezone_name()
#     languageId()
#     territoryId()
#     supports_ascii()
#
# These are the functions which do not start with an “_” in their name.
# All global functions and global variables whose name starts with an
# “_” are internal and should not be used by a user of langtable.py.
#
######################################################################

import os
import re
import logging
import gzip

import xml.parsers.expat
from xml.sax.handler import ContentHandler

# will be replaced by “make install”:
_datadir = '/usr/share/langtable'

# For the ICU/CLDR locale pattern see: http://userguide.icu-project.org/locale
# (We ignore the variant code here)
_cldr_locale_pattern = re.compile(
    # language must be 2 or 3 lower case letters:
    '^(?P<language>[a-z]{2,3}'
    # language is only valid if
    +'(?=$|@' # locale string ends here or only options follow
    +'|_[A-Z][a-z]{3}(?=$|@|_[A-Z]{2}(?=$|@))' # valid script follows
    +'|_[A-Z]{2}(?=$|@)' # valid territory follows
    +'))'
    # script must be 1 upper case letter followed by
    # 3 lower case letters:
    +'(?:_(?P<script>[A-Z][a-z]{3})'
    # script is only valid if
    +'(?=$|@' # locale string ends here or only options follow
    +'|_[A-Z]{2}(?=$|@)' # valid territory follows
    +')){0,1}'
    # territory must be 2 upper case letters:
    +'(?:_(?P<territory>[A-Z]{2})'
    # territory is only valid if
    +'(?=$|@' # locale string ends here or only options follow
    +')){0,1}')

# http://www.unicode.org/iso15924/iso15924-codes.html
_glibc_script_ids = {
    'latin': 'Latn',
    'iqtelif': 'Latn', # Tatar, tt_RU.UTF-8@iqtelif, http://en.wikipedia.org/wiki/User:Ultranet/%C4%B0QTElif
    'cyrillic': 'Cyrl',
    'devanagari': 'Deva',
}

_territories_db = {}
_languages_db = {}
_keyboards_db = {}
_timezones_db = {}
_timezoneIdParts_db = {}

class territory_db_item:
    def __init__(self, names = None, scripts=None, locales=None, languages=None, keyboards=None, inputmethods=None, consolefonts=None, timezones=None):
        self.names = names
        self.scripts = scripts
        self.locales = locales
        self.languages = languages
        self.keyboards = keyboards
        self.inputmethods = inputmethods
        self.consolefonts = consolefonts
        self.timezones = timezones

class language_db_item:
    def __init__(self, iso639_1=None, iso639_2_t=None, iso639_2_b=None, names=None, scripts=None, locales=None, territories=None, keyboards=None, inputmethods=None, consolefonts=None, timezones=None):
        self.iso639_1 = iso639_1
        self.iso639_2_t = iso639_2_t
        self.iso639_2_b = iso639_2_b
        self.names = names
        self.scripts = scripts
        self.locales = locales
        self.territories = territories
        self.keyboards = keyboards
        self.inputmethods = inputmethods
        self.consolefonts = consolefonts
        self.timezones = timezones

class keyboard_db_item:
    def __init__(self, description=None, ascii=True, languages=None, territories = None, comment=None):
        self.description = description
        self.ascii  = ascii
        self.comment = comment
        self.languages = languages
        self.territories = territories

class timezone_db_item:
    def __init__(self, names=None):
        self.names = names

class timezoneIdPart_db_item:
    def __init__(self, names=None):
        self.names = names

# xml.sax.handler.ContentHandler is not inherited from the 'object' class,
# 'super' keyword wouldn't work, we need to inherit it on our own
class LangtableContentHandler(ContentHandler, object):
    """
    A base class inherited from the xml.sax.handler.ContentHandler class
    providing handling for SAX events produced when parsing the langtable data
    files.

    """

    def __init__(self):
        # internal attribute used to set where the upcoming text data should be
        # stored
        self._save_to = None

    def characters(self, content):
        """Handler for the text data event."""

        if self._save_to is None:
            # don't know where to save data
            return

        # text content may split in multiple events
        old_value = getattr(self, self._save_to)
        if old_value:
            new_value = old_value + content
        else:
            new_value = content

        setattr(self, self._save_to, new_value)

class TerritoriesContentHandler(LangtableContentHandler):
    """Handler for SAX events produced when parsing the territories.xml file."""

    def __init__(self):
        super(TerritoriesContentHandler, self).__init__()

        # simple values
        self._territoryId = None

        # helper variables
        self._item_id = None
        self._item_rank = None
        self._item_name = None

        # dictionaries
        self._names = None
        self._scripts = None
        self._locales = None
        self._languages = None
        self._keyboards = None
        self._inputmethods = None
        self._consolefonts = None
        self._timezones = None

    def startElement(self, name, attrs):
        if name == u"territory":
            self._names = dict()
            self._scripts = dict()
            self._locales = dict()
            self._languages = dict()
            self._keyboards = dict()
            self._inputmethods = dict()
            self._consolefonts = dict()
            self._timezones = dict()

        # non-dict values
        elif name == u"territoryId":
            self._save_to = "_territoryId"

        # dict items
        elif name in (u"languageId", u"scriptId", u"localeId", u"keyboardId", u"inputmethodId",
                      u"consolefontId", u"timezoneId"):
            self._save_to = "_item_id"
        elif name == u"trName":
            self._save_to = "_item_name"
        elif name == u"rank":
            self._save_to = "_item_rank"

    def endElement(self, name):
        # we don't allow text to appear on the same level as elements so outside
        # of an element no text should appear
        self._save_to = None

        if name == u"territory":
            _territories_db[str(self._territoryId)] = territory_db_item(
                names = self._names,
                scripts = self._scripts,
                locales = self._locales,
                languages = self._languages,
                keyboards = self._keyboards,
                inputmethods = self._inputmethods,
                consolefonts = self._consolefonts,
                timezones = self._timezones)

            # clean after ourselves
            self._territoryId = None
            self._names = None
            self._scripts = None
            self._locales = None
            self._languages = None
            self._keyboards = None
            self._inputmethods = None
            self._consolefonts = None
            self._timezones = None

        # populating dictionaries
        elif name == u"name":
            self._names[str(self._item_id)] = self._item_name
            self._clear_item()
        elif name == u"script":
            self._scripts[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"locale":
            self._locales[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"language":
            self._languages[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"keyboard":
            self._keyboards[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"inputmethod":
            self._inputmethods[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"consolefont":
            self._consolefonts[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"timezone":
            self._timezones[str(self._item_id)] = int(self._item_rank)
            self._clear_item()

    def _clear_item(self):
        self._item_id = None
        self._item_name = None
        self._item_rank = None

class KeyboardsContentHandler(LangtableContentHandler):
    """Handler for SAX events produced when parsing the keyboards.xml file."""

    def __init__(self):
        super(KeyboardsContentHandler, self).__init__()

        # simple values
        self._keyboardId = None
        self._description = None
        self._ascii = None
        self._comment = None

        # helper variables
        self._item_id = None
        self._item_rank = None

        # dictionaries
        self._languages = None
        self._territories = None

    def startElement(self, name, attrs):
        if name == u"keyboard":
            self._languages = dict()
            self._territories = dict()

        # non-dict values
        elif name == u"keyboardId":
            self._save_to = "_keyboardId"
        elif name == u"description":
            self._save_to = "_description"
        elif name == u"ascii":
            self._save_to = "_ascii"
        elif name == u"comment":
            self._save_to = "_comment"

        # dict items
        elif name in (u"languageId", u"territoryId"):
            self._save_to = "_item_id"
        elif name == u"rank":
            self._save_to = "_item_rank"

    def endElement(self, name):
        # we don't allow text to appear on the same level as elements so outside
        # of an element no text should appear
        self._save_to = None

        if name == u"keyboard":
            _keyboards_db[str(self._keyboardId)] = keyboard_db_item(
                description = self._description,
                ascii = self._ascii == u"True",
                comment = self._comment,
                languages = self._languages,
                territories = self._territories)

            # clean after ourselves
            self._keyboardId = None
            self._description = None
            self._ascii = None
            self._comment = None
            self._languages = None
            self._territories = None

        # populating dictionaries
        elif name == u"language":
            self._languages[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"territory":
            self._territories[str(self._item_id)] = int(self._item_rank)
            self._clear_item()

    def _clear_item(self):
        self._item_id = None
        self._item_rank = None

class LanguagesContentHandler(LangtableContentHandler):
    """Handler for SAX events produced when parsing the languages.xml file."""

    def __init__(self):
        super(LanguagesContentHandler, self).__init__()
        # simple values
        self._languageId = None
        self._iso639_1 = None
        self._iso639_2_t = None
        self._iso639_2_b = None

        # helper variables
        self._item_id = None
        self._item_rank = None
        self._item_name = None

        # flag to distinguish 'languageId' elements inside and outside of the
        # 'names' element
        self._in_names = False

        # dictionaries
        self._names = None
        self._scripts = None
        self._locales = None
        self._territories = None
        self._keyboards = None
        self._inputmethods = None
        self._consolefonts = None
        self._timezones = None

    def startElement(self, name, attrs):
        if name == u"language":
            self._names = dict()
            self._scripts = dict()
            self._locales = dict()
            self._territories = dict()
            self._keyboards = dict()
            self._inputmethods = dict()
            self._consolefonts = dict()
            self._timezones = dict()

        # non-dict values
        elif name == u"languageId" and not self._in_names:
            # ID of the language
            self._save_to = "_languageId"
        elif name == u"iso639-1":
            self._save_to = "_iso639_1"
        elif name == u"iso639-2-t":
            self._save_to = "_iso639_2_t"
        elif name == u"iso639-2-b":
            self._save_to = "_iso639_2_b"
        elif name == u"names":
            self._in_names = True

        # dict items
        elif name in (u"scriptId", u"localeId", u"territoryId", u"keyboardId", u"inputmethodId",
                      u"consolefontId", u"timezoneId"):
            self._save_to = "_item_id"
        elif name == u"languageId" and self._in_names:
            # ID of the translated name's language
            self._save_to = "_item_id"
        elif name == u"trName":
            self._save_to = "_item_name"
        elif name == u"rank":
            self._save_to = "_item_rank"

    def endElement(self, name):
        # we don't allow text to appear on the same level as elements so outside
        # of an element no text should appear
        self._save_to = None

        if name == u"language":
            _languages_db[str(self._languageId)] = language_db_item(
                iso639_1 = self._iso639_1,
                iso639_2_t = self._iso639_2_t,
                iso639_2_b = self._iso639_2_b,
                names = self._names,
                scripts = self._scripts,
                locales = self._locales,
                territories = self._territories,
                keyboards = self._keyboards,
                inputmethods = self._inputmethods,
                consolefonts = self._consolefonts,
                timezones = self._timezones)

            # clean after ourselves
            self._languageId = None
            self._iso639_1 = None
            self._iso639_2_t = None
            self._iso639_2_b = None
            self._names = None
            self._scripts = None
            self._locales = None
            self._territories = None
            self._keyboards = None
            self._inputmethods = None
            self._consolefonts = None
            self._timezones = None

        # leaving the "names" element
        elif name == u"names":
            self._in_names = False

        # populating dictionaries
        elif name == u"name":
            self._names[str(self._item_id)] = self._item_name
            self._clear_item()
        elif name == u"script":
            self._scripts[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"locale":
            self._locales[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"territory":
            self._territories[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"keyboard":
            self._keyboards[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"inputmethod":
            self._inputmethods[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"consolefont":
            self._consolefonts[str(self._item_id)] = int(self._item_rank)
            self._clear_item()
        elif name == u"timezone":
            self._timezones[str(self._item_id)] = int(self._item_rank)
            self._clear_item()

    def _clear_item(self):
        self._item_id = None
        self._item_name = None
        self._item_rank = None

class TimezonesContentHandler(LangtableContentHandler):
    """Handler for SAX events produced when parsing the timezones.xml file."""

    def __init__(self):
        super(TimezonesContentHandler, self).__init__()
        # simple values
        self._timezoneId = None

        # helper variables
        self._item_id = None
        self._item_name = None

        # dictionaries
        self._names = None

    def startElement(self, name, attrs):
        if name == u"timezone":
            self._names = dict()

        # non-dict values
        elif name == u"timezoneId":
            # ID of the timezone
            self._save_to = "_timezoneId"

        # dict items
        elif name == u"languageId":
            # ID of the translated timezone's language
            self._save_to = "_item_id"
        elif name == u"trName":
            self._save_to = "_item_name"

    def endElement(self, name):
        # we don't allow text to appear on the same level as elements so outside
        # of an element no text should appear
        self._save_to = None

        if name == u"timezone":
            _timezones_db[str(self._timezoneId)] = timezone_db_item(
                names = self._names)

            # clean after ourselves
            self._timezoneId = None
            self._names = None

        # populating dictionaries
        elif name == u"name":
            self._names[str(self._item_id)] = self._item_name
            self._clear_item()

    def _clear_item(self):
        self._item_id = None
        self._item_name = None

class TimezoneIdPartsContentHandler(LangtableContentHandler):
    """Handler for SAX events produced when parsing the timezoneidparts.xml file."""

    def __init__(self):
        super(TimezoneIdPartsContentHandler, self).__init__()
        # simple values
        self._timezoneIdPartId = None

        # helper variables
        self._item_id = None
        self._item_name = None

        # dictionaries
        self._names = None

    def startElement(self, name, attrs):
        if name == u"timezoneIdPart":
            self._names = dict()

        # non-dict values
        elif name == u"timezoneIdPartId":
            # partial timezone ID
            self._save_to = "_timezoneIdPartId"

        # dict items
        elif name == u"languageId":
            # ID of the translated partial timezone ID's language
            self._save_to = "_item_id"
        elif name == u"trName":
            self._save_to = "_item_name"

    def endElement(self, name):
        # we don't allow text to appear on the same level as elements so outside
        # of an element no text should appear
        self._save_to = None

        if name == u"timezoneIdPart":
            _timezoneIdParts_db[str(self._timezoneIdPartId)] = timezoneIdPart_db_item(
                names = self._names)

            # clean after ourselves
            self._timezoneIdPartId = None
            self._names = None

        # populating dictionaries
        elif name == u"name":
            self._names[str(self._item_id)] = self._item_name
            self._clear_item()

    def _clear_item(self):
        self._item_id = None
        self._item_name = None

def _write_territories_file(file):
    '''
    Only for internal use
    '''
    file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    file.write('<territories>\n')
    for territoryId in sorted(_territories_db):
        file.write('  <territory>\n')
        file.write('    <territoryId>'+territoryId+'</territoryId>\n')
        names = _territories_db[territoryId].names
        file.write('    <names>\n')
        for name in sorted(names):
            file.write(
                '      <name>'
                +'<languageId>'+name+'</languageId>'
                +'<trName>'+names[name]+'</trName>'
                +'</name>\n')
        file.write('    </names>\n')
        scripts = _territories_db[territoryId].scripts
        file.write('    <scripts>\n')
        for scriptId, rank in sorted(scripts.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <script>'
                +'<scriptId>'+scriptId+'</scriptId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</script>\n')
        file.write('    </scripts>\n')
        locales = _territories_db[territoryId].locales
        file.write('    <locales>\n')
        for localeId, rank in sorted(locales.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <locale>'
                +'<localeId>'+localeId+'</localeId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</locale>\n')
        file.write('    </locales>\n')
        languages = _territories_db[territoryId].languages
        file.write('    <languages>\n')
        for languageId, rank in sorted(languages.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <language>'
                +'<languageId>'+languageId+'</languageId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</language>\n')
        file.write('    </languages>\n')
        keyboards = _territories_db[territoryId].keyboards
        file.write('    <keyboards>\n')
        for keyboardId, rank in sorted(keyboards.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <keyboard>'
                +'<keyboardId>'+keyboardId+'</keyboardId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</keyboard>\n')
        file.write('    </keyboards>\n')
        inputmethods = _territories_db[territoryId].inputmethods
        file.write('    <inputmethods>\n')
        for inputmethodId, rank in sorted(inputmethods.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <inputmethod>'
                +'<inputmethodId>'+inputmethodId+'</inputmethodId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</inputmethod>\n')
        file.write('    </inputmethods>\n')
        consolefonts = _territories_db[territoryId].consolefonts
        file.write('    <consolefonts>\n')
        for consolefontId, rank in sorted(consolefonts.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <consolefont>'
                +'<consolefontId>'+consolefontId+'</consolefontId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</consolefont>\n')
        file.write('    </consolefonts>\n')
        timezones = _territories_db[territoryId].timezones
        file.write('    <timezones>\n')
        for timezoneId, rank in sorted(timezones.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <timezone>'
                +'<timezoneId>'+timezoneId+'</timezoneId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</timezone>\n')
        file.write('    </timezones>\n')
        file.write('  </territory>\n')
    file.write('</territories>\n')
    return

def _write_languages_file(file):
    '''
    Only for internal use
    '''
    file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    file.write('<languages>\n')
    for languageId in sorted(_languages_db):
        file.write('  <language>\n')
        file.write('    <languageId>'+languageId+'</languageId>\n')
        file.write('    <iso639-1>'+str(_languages_db[languageId].iso639_1)+'</iso639-1>\n')
        file.write('    <iso639-2-t>'+str(_languages_db[languageId].iso639_2_t)+'</iso639-2-t>\n')
        file.write('    <iso639-2-b>'+str(_languages_db[languageId].iso639_2_b)+'</iso639-2-b>\n')
        names = _languages_db[languageId].names
        file.write('    <names>\n')
        for name in sorted(names):
            file.write(
                '      <name>'
                +'<languageId>'+name+'</languageId>'
                +'<trName>'+names[name]+'</trName>'
                +'</name>\n')
        file.write('    </names>\n')
        scripts = _languages_db[languageId].scripts
        file.write('    <scripts>\n')
        for scriptId, rank in sorted(scripts.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <script>'
                +'<scriptId>'+scriptId+'</scriptId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</script>\n')
        file.write('    </scripts>\n')
        locales = _languages_db[languageId].locales
        file.write('    <locales>\n')
        for localeId, rank in sorted(locales.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <locale>'
                +'<localeId>'+localeId+'</localeId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</locale>\n')
        file.write('    </locales>\n')
        territories = _languages_db[languageId].territories
        file.write('    <territories>\n')
        for territoryId, rank in sorted(territories.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <territory>'
                +'<territoryId>'+territoryId+'</territoryId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</territory>\n')
        file.write('    </territories>\n')
        keyboards = _languages_db[languageId].keyboards
        file.write('    <keyboards>\n')
        for keyboardId, rank in sorted(keyboards.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <keyboard>'
                +'<keyboardId>'+keyboardId+'</keyboardId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</keyboard>\n')
        file.write('    </keyboards>\n')
        inputmethods = _languages_db[languageId].inputmethods
        file.write('    <inputmethods>\n')
        for inputmethodId, rank in sorted(inputmethods.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <inputmethod>'
                +'<inputmethodId>'+inputmethodId+'</inputmethodId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</inputmethod>\n')
        file.write('    </inputmethods>\n')
        consolefonts = _languages_db[languageId].consolefonts
        file.write('    <consolefonts>\n')
        for consolefontId, rank in sorted(consolefonts.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <consolefont>'
                +'<consolefontId>'+consolefontId+'</consolefontId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</consolefont>\n')
        file.write('    </consolefonts>\n')
        timezones = _languages_db[languageId].timezones
        file.write('    <timezones>\n')
        for timezoneId, rank in sorted(timezones.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <timezone>'
                +'<timezoneId>'+timezoneId+'</timezoneId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</timezone>\n')
        file.write('    </timezones>\n')
        file.write('  </language>\n')
    file.write('</languages>\n')
    return

def _write_keyboards_file(file):
    '''
    Only for internal use
    '''
    file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    file.write('<keyboards>\n')
    for keyboardId in sorted(_keyboards_db):
        file.write('  <keyboard>\n')
        file.write('    <keyboardId>'+keyboardId+'</keyboardId>\n')
        file.write('    <description>'+_keyboards_db[keyboardId].description+'</description>\n')
        file.write('    <ascii>'+str(_keyboards_db[keyboardId].ascii)+'</ascii>\n')
        if _keyboards_db[keyboardId].comment != None:
            file.write('    <comment>'+_keyboards_db[keyboardId].comment+'</comment>\n')
        languages = _keyboards_db[keyboardId].languages
        file.write('    <languages>\n')
        for languageId, rank in sorted(languages.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <language>'
                +'<languageId>'+languageId+'</languageId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</language>\n')
        file.write('    </languages>\n')
        territories = _keyboards_db[keyboardId].territories
        file.write('    <territories>\n')
        for territoryId, rank in sorted(territories.items(), key=lambda x: (-1*x[1],x[0])):
            file.write(
                '      <territory>'
                +'<territoryId>'+territoryId+'</territoryId>'
                +'<rank>'+str(rank)+'</rank>'
                +'</territory>\n')
        file.write('    </territories>\n')
        file.write('  </keyboard>\n')
    file.write('</keyboards>\n')
    return

def _write_timezones_file(file):
    '''
    Only for internal use
    '''
    file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    file.write('<timezones>\n')
    for timezoneId in sorted(_timezones_db):
        file.write('  <timezone>\n')
        file.write('    <timezoneId>'+timezoneId+'</timezoneId>\n')
        names = _timezones_db[timezoneId].names
        file.write('    <names>\n')
        for name in sorted(names):
            file.write(
                '      <name>'
                +'<languageId>'+name+'</languageId>'
                +'<trName>'+names[name]+'</trName>'
                +'</name>\n')
        file.write('    </names>\n')
        file.write('  </timezone>\n')
    file.write('</timezones>\n')
    return

def _write_timezoneIdParts_file(file):
    '''
    Only for internal use
    '''
    file.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    file.write('<timezoneIdParts>\n')
    for timezoneIdPartId in sorted(_timezoneIdParts_db):
        file.write('  <timezoneIdPart>\n')
        file.write('    <timezoneIdPartId>'+timezoneIdPartId+'</timezoneIdPartId>\n')
        names = _timezoneIdParts_db[timezoneIdPartId].names
        file.write('    <names>\n')
        for name in sorted(names):
            file.write(
                '      <name>'
                +'<languageId>'+name+'</languageId>'
                +'<trName>'+names[name]+'</trName>'
                +'</name>\n')
        file.write('    </names>\n')
        file.write('  </timezoneIdPart>\n')
    file.write('</timezoneIdParts>\n')
    return

def _expat_parse(file, sax_handler):
    """
    Only for internal use. Parses a given file object with a given SAX handler
    using an expat parser.
    """

    parser = xml.parsers.expat.ParserCreate()
    parser.StartElementHandler = sax_handler.startElement
    parser.EndElementHandler = sax_handler.endElement
    parser.CharacterDataHandler = sax_handler.characters
    parser.ParseFile(file)

def _read_file(datadir, filename, sax_handler):
    '''
    Only for internal use
    '''

    for dir in [datadir, '.']:
        path = os.path.join(dir, filename)
        if os.path.isfile(path):
            with open(path, mode='rb') as file:
                logging.info('reading file=%s' %file)
                _expat_parse(file, sax_handler)
            return
        path = os.path.join(dir, filename+'.gz')
        if os.path.isfile(path):
            with gzip.open(path, mode='rb') as file:
                logging.info('reading file=%s' %file)
                _expat_parse(file, sax_handler)
            return
    logging.info('no readable file found.')

def _write_files(territoriesfilename, languagesfilename, keyboardsfilename, timezonesfilename, timezoneidpartsfilename):
    '''
    Only for internal use
    '''
    with open(territoriesfilename, 'w') as territoriesfile:
        logging.info("writing territories file=%s" %territoriesfile)
        _write_territories_file(territoriesfile)
    with open(languagesfilename, 'w') as languagesfile:
        logging.info("writing languages file=%s" %languagesfile)
        _write_languages_file(languagesfile)
    with open(keyboardsfilename, 'w') as keyboardsfile:
        logging.info("writing keyboards file=%s" %keyboardsfile)
        _write_keyboards_file(keyboardsfile)
    with open(keyboardsfilename, 'w') as keyboardsfile:
        logging.info("writing keyboards file=%s" %keyboardsfile)
        _write_keyboards_file(keyboardsfile)
    with open(timezonesfilename, 'w') as timezonesfile:
        logging.info("writing timezones file=%s" %timezonesfile)
        _write_timezones_file(timezonesfile)
    with open(timezoneidpartsfilename, 'w') as timezoneidpartsfile:
        logging.info("writing timezoneidparts file=%s" %timezoneidpartsfile)
        _write_timezoneIdParts_file(timezoneidpartsfile)
    return

def _dictionary_to_ranked_list(dict, reverse=True):
    sorted_list = []
    for item in sorted(dict, key=lambda x: (dict.get(x), x), reverse=reverse):
        if dict[item] != 0:
            sorted_list.append([item, dict[item]])
    return sorted_list

def _ranked_list_to_list(ranked_list):
    return list(map(lambda x: x[0], ranked_list))

def _make_ranked_list_concise(ranked_list, cut_off_factor=1000):
    if not len(ranked_list) > 1:
        return ranked_list
    for i in range(0,len(ranked_list)-1):
        if ranked_list[i][1]/ranked_list[i+1][1] > cut_off_factor:
            ranked_list = ranked_list[0:i+1]
            break
    return ranked_list

def _parse_and_split_languageId(languageId=None, scriptId=None, territoryId=None):
    '''
    Parses languageId and if it contains a valid ICU locale id,
    returns the values for language, script, and territory found
    in languageId instead of the original values given.

    Before parsing, it replaces glibc names for scripts like “latin”
    with the iso-15924 script names like “Latn”, both in the
    languageId and the scriptId parameter. I.e.  language id like
    “sr_latin_RS” is accepted as well and treated the same as
    “sr_Latn_RS”.
    '''
    if languageId:
        dot_index = languageId.find('.')
        at_index = languageId.find('@')
        if dot_index >= 0 and at_index > dot_index:
            languageId = languageId[:dot_index] + languageId[at_index:]
        elif dot_index >= 0:
            languageId = languageId[:dot_index]
    for key in _glibc_script_ids:
        if scriptId:
            scriptId = scriptId.replace(key, _glibc_script_ids[key])
        if languageId:
            if languageId.endswith('@'+key):
                scriptId = _glibc_script_ids[key]
            languageId = languageId.replace(key, _glibc_script_ids[key])
    if (languageId):
        match = _cldr_locale_pattern.match(languageId)
        if match:
            languageId = match.group('language')
            if match.group('script'):
                scriptId = match.group('script')
            if match.group('territory'):
                territoryId = match.group('territory')
        else:
            logging.info("languageId contains invalid locale id=%s" %languageId)
    # if the language is Chinese and only the territory is given
    # but not the script, add the default script for the territory:
    if languageId == 'zh' and territoryId and not scriptId:
        if territoryId in ['CN', 'SG']:
            scriptId = 'Hans'
        elif territoryId in ['HK', 'MO', 'TW']:
            scriptId = 'Hant'
    return (languageId, scriptId, territoryId)

def territory_name(territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None):
    u'''Query translations of territory names

    :param territoryId: identifier for the territory
    :type territoryId: string
    :param languageIdQuery: identifier for the language used in the result
    :type languageIdQuery: string
    :param scriptIdQuery: identifier for the script used in the result
    :type scriptIdQuery: string
    :param territoryIdQuery: identifier for the territory used in the result
    :type territoryIdQuery: string
    :rtype: string

    **Examples:**

    Switzerland is called “Schweiz” in German:

    >>> print(territory_name(territoryId="CH", languageIdQuery="de"))
    Schweiz

    And it is called “Svizzera” in Italian:

    >>> print(territory_name(territoryId="CH", languageIdQuery="it"))
    Svizzera

    And it is called “スイス” in Japanese:

    >>> print(territory_name(territoryId="CH", languageIdQuery="ja"))
    スイス
    '''
    languageIdQuery, scriptIdQuery, territoryIdQuery = _parse_and_split_languageId(
        languageId=languageIdQuery,
        scriptId=scriptIdQuery,
        territoryId=territoryIdQuery)
    if territoryId in _territories_db:
        if languageIdQuery and scriptIdQuery and territoryIdQuery:
            icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery
            if icuLocaleIdQuery in _territories_db[territoryId].names:
                return _territories_db[territoryId].names[icuLocaleIdQuery]
        if languageIdQuery and scriptIdQuery:
            icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery
            if icuLocaleIdQuery in _territories_db[territoryId].names:
                return _territories_db[territoryId].names[icuLocaleIdQuery]
        if languageIdQuery and territoryIdQuery:
            icuLocaleIdQuery = languageIdQuery+'_'+territoryIdQuery
            if icuLocaleIdQuery in _territories_db[territoryId].names:
                return _territories_db[territoryId].names[icuLocaleIdQuery]
        if languageIdQuery:
            icuLocaleIdQuery = languageIdQuery
            if icuLocaleIdQuery in _territories_db[territoryId].names:
                return _territories_db[territoryId].names[icuLocaleIdQuery]
    return ''

def language_name(languageId = None, scriptId = None, territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None):
    u'''Query translations of language names

    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :param languageIdQuery: identifier for the language used in the result
    :type languageIdQuery: string
    :param scriptIdQuery: identifier for the script used in the result
    :type scriptIdQuery: string
    :param territoryIdQuery: identifier for the territory used in the result
    :type territoryIdQuery: string
    :rtype: string

    **Examples:**

    >>> print(language_name(languageId="sr"))
    српски

    I.e. the endonym for “Serbian” in the default Cyrillic script is
    “српски”.

    If the script “Cyrl” is supplied as well, the name of the
    script is added for clarity:

    >>> print(language_name(languageId="sr", scriptId="Cyrl"))
    српски (Ћирилица)

    And in Latin script the endonym is:

    >>> print(language_name(languageId="sr", scriptId="Latn"))
    Srpski (Latinica)

    And “Serbian” translated to English is:

    >>> print(language_name(languageId="sr", languageIdQuery="en"))
    Serbian

    And with adding the script information:

    >>> print(language_name(languageId="sr", scriptId="Cyrl", languageIdQuery="en"))
    Serbian (Cyrillic)

    >>> print(language_name(languageId="sr", scriptId="Latn", languageIdQuery="en"))
    Serbian (Latin)

    '''
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    languageIdQuery, scriptIdQuery, territoryIdQuery = _parse_and_split_languageId(
        languageId=languageIdQuery,
        scriptId=scriptIdQuery,
        territoryId=territoryIdQuery)
    if not languageIdQuery:
        # get the endonym
        languageIdQuery = languageId
        scriptIdQuery = scriptId
        territoryIdQuery = territoryId
    if languageId and scriptId and territoryId:
        icuLocaleId = languageId+'_'+scriptId+'_'+territoryId
        if icuLocaleId in _languages_db:
            if languageIdQuery and scriptIdQuery and territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery and scriptIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if  languageIdQuery and  territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery:
                icuLocaleIdQuery = languageIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
    if languageId and scriptId:
        icuLocaleId = languageId+'_'+scriptId
        if icuLocaleId in _languages_db:
            cname = territory_name(territoryId=territoryId,
                                   languageIdQuery=languageIdQuery,
                                   scriptIdQuery=scriptIdQuery,
                                   territoryIdQuery=territoryIdQuery)
            if languageIdQuery and  scriptIdQuery and territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    lname = _languages_db[icuLocaleId].names[icuLocaleIdQuery]
                    if cname:
                        return lname + ' ('+cname+')'
                    return lname
            if languageIdQuery and  scriptIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    lname = _languages_db[icuLocaleId].names[icuLocaleIdQuery]
                    if cname:
                        return lname + ' ('+cname+')'
                    return lname
            if  languageIdQuery and  territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    lname = _languages_db[icuLocaleId].names[icuLocaleIdQuery]
                    if cname:
                        return lname + ' ('+cname+')'
                    return lname
            if languageIdQuery:
                icuLocaleIdQuery = languageIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    lname = _languages_db[icuLocaleId].names[icuLocaleIdQuery]
                    if cname:
                        return lname + ' ('+cname+')'
                    return lname
    if languageId and territoryId:
        icuLocaleId = languageId+'_'+territoryId
        if icuLocaleId in _languages_db:
            if languageIdQuery and  scriptIdQuery and territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery and  scriptIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if  languageIdQuery and  territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery:
                icuLocaleIdQuery = languageIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
        lname = language_name(languageId=languageId,
                              languageIdQuery=languageIdQuery,
                              scriptIdQuery=scriptIdQuery,
                              territoryIdQuery=territoryIdQuery)
        cname = territory_name(territoryId=territoryId,
                             languageIdQuery=languageIdQuery,
                             scriptIdQuery=scriptIdQuery,
                             territoryIdQuery=territoryIdQuery)
        if lname and cname:
            return lname + ' ('+cname+')'
    if languageId:
        icuLocaleId = languageId
        if icuLocaleId in _languages_db:
            if languageIdQuery and  scriptIdQuery and territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery and  scriptIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+scriptIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if  languageIdQuery and  territoryIdQuery:
                icuLocaleIdQuery = languageIdQuery+'_'+territoryIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
            if languageIdQuery:
                icuLocaleIdQuery = languageIdQuery
                if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
                    return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
    return ''

def _timezone_name_from_id_parts(timezoneId = None, icuLocaleIdQuery = None):
    '''Query translation of timezone IDs by querying translations
    for each part of the ID seperately and putting the results together
    '''
    if not (timezoneId and icuLocaleIdQuery):
        return ''
    timezoneId_parts = timezoneId.split('/')
    part_names = []
    for timezoneId_part in timezoneId_parts:
        if timezoneId_part not in _timezoneIdParts_db:
            part_names.append(timezoneId_part)
            continue
        if icuLocaleIdQuery in _timezoneIdParts_db[timezoneId_part].names:
            name = _timezoneIdParts_db[timezoneId_part].names[icuLocaleIdQuery]
            if name:
                part_names.append(name)
        elif icuLocaleIdQuery == 'en':
            name = timezoneId_part.replace('_', ' ')
            part_names.append(name)
    if len(part_names) == len(timezoneId_parts):
        return u'/'.join(part_names)
    return ''

def _timezone_name(timezoneId = None, icuLocaleIdQuery = None):
    '''
    Internal helper function to translate timezone IDs
    '''
    if not (timezoneId and icuLocaleIdQuery):
        return ''
    if timezoneId in _timezones_db:
        if icuLocaleIdQuery in _timezones_db[timezoneId].names:
            return _timezones_db[timezoneId].names[icuLocaleIdQuery]
    name_from_parts = _timezone_name_from_id_parts(
        timezoneId=timezoneId, icuLocaleIdQuery=icuLocaleIdQuery)
    if name_from_parts:
        return name_from_parts
    return ''

def timezone_name(timezoneId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None):
    u'''Query translations of timezone IDs

    :param timezoneId: identifier for the time zone
    :type timezoneId: string
    :param languageIdQuery: identifier for the language used in the result
    :type languageIdQuery: string
    :param scriptIdQuery: identifier for the script used in the result
    :type scriptIdQuery: string
    :param territoryIdQuery: identifier for the territory used in the result
    :type territoryId: string
    :rtype: string

    **Examples:**

    >>> print(timezone_name(timezoneId='US/Pacific', languageIdQuery='ja'))
    アメリカ合衆国/太平洋時間

    If no translation can be found, the timezone ID is returned
    unchanged:

    >>> print(timezone_name(timezoneId='Pacific/Pago_Pago', languageIdQuery='xxx'))
    Pacific/Pago_Pago
    '''
    languageIdQuery, scriptIdQuery, territoryIdQuery = _parse_and_split_languageId(
        languageId=languageIdQuery,
        scriptId=scriptIdQuery,
        territoryId=territoryIdQuery)
    if languageIdQuery and scriptIdQuery and territoryIdQuery:
        name = _timezone_name(
            timezoneId=timezoneId,
            icuLocaleIdQuery=languageIdQuery+'_'+scriptIdQuery+'_'+territoryIdQuery)
        if name:
            return name
    if languageIdQuery and scriptIdQuery:
        name = _timezone_name(
            timezoneId=timezoneId,
            icuLocaleIdQuery=languageIdQuery+'_'+scriptIdQuery)
        if name:
            return name
    if languageIdQuery and territoryIdQuery:
        name = _timezone_name(
            timezoneId=timezoneId,
            icuLocaleIdQuery=languageIdQuery+'_'+territoryIdQuery)
        if name:
            return name
    if languageIdQuery:
        name = _timezone_name(
            timezoneId=timezoneId,
            icuLocaleIdQuery=languageIdQuery)
        if name:
            return name
    return timezoneId

def territoryId(territoryName = u''):
    '''Query the territoryId from a translated name of a territory.

    :param territoryName: the translated name of a language
    :type territoryName: string
    :rtype: string

    The translated name given should be a Python Unicode string or an
    UTF-8 encoded string.

    The translated name can be in any language. But there will be only
    a result if the translation matches exactly.

    **Examples:**

    >>> territoryId("India")
    'IN'

    >>> territoryId("भारत")
    'IN'

    >>> territoryId("インド")
    'IN'

    >>> territoryId("Latin America")
    '419'

    >>> territoryId("Latinoamérica")
    '419'

    '''
    if not territoryName:
        return ''
    if type(territoryName) != type(u''):
        territoryName = territoryName.decode('UTF-8')
    for territoryId in _territories_db:
        for icuLocaleId in _territories_db[territoryId].names:
            if territoryName == _territories_db[territoryId].names[icuLocaleId]:
                return territoryId
    return ''

def languageId(languageName = u''):
    '''Query the languageId from a translated name of a language.

    :param languageName: the translated name of a language
    :type languageName: string
    :rtype: string

    The translated name given should be a Python Unicode string or an
    UTF-8 encoded string.

    The translated name can be in any language. But there will be only
    a result if the translation matches exactly.

    **Examples:**

    >>> languageId("Marathi")
    'mr'

    >>> languageId("मराठी")
    'mr'

    >>> languageId("マラーティー語")
    'mr'

    '''
    if not languageName:
        return ''
    if type(languageName) != type(u''):
        languageName = languageName.decode('UTF-8')
    for languageId in _languages_db:
        for icuLocaleId in _languages_db[languageId].names:
            if languageName.lower() == _languages_db[languageId].names[icuLocaleId].lower():
                return languageId
    language_territory_pattern = re.compile(
        r'^(?P<language_name>[^()]+)[\s]+[(](?P<territory_name>[^()]+)[)]',
        re.MULTILINE|re.UNICODE)
    match = language_territory_pattern.search(languageName)
    if match:
        language_name = match.group('language_name')
        territory_name = match.group('territory_name')
        for languageId in _languages_db:
            for icuLocaleId in _languages_db[languageId].names:
                if language_name.lower() == _languages_db[languageId].names[icuLocaleId].lower():
                    for territoryId in _territories_db:
                        for icuLocaleId_territory in _territories_db[territoryId].names:
                            if territory_name.lower() == _territories_db[territoryId].names[icuLocaleId_territory].lower():
                                return languageId+'_'+territoryId

    return ''

extra_bonus = 1000000

def list_locales(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    '''List suitable glibc locales

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    **Examples:**

    List the suitable locales for the language “German”:

    >>> list_locales(languageId="de")
    ['de_DE.UTF-8', 'de_AT.UTF-8', 'de_CH.UTF-8', 'de_BE.UTF-8', 'de_LU.UTF-8']

    So this returns a list of locales for German. These lists are
    sorted in order of decreasing likelyhood, i.e. the most common
    value comes first.

    One can also list the possible locales for the territory “Switzerland”:

    >>> list_locales(territoryId="CH")
    ['de_CH.UTF-8', 'fr_CH.UTF-8', 'it_CH.UTF-8', 'wae_CH.UTF-8']


    If one knows both, the language “German” and the territory
    “Switzerland”, the result is unique:

    >>> list_locales(languageId="de", territoryId="CH")
    ['de_CH.UTF-8']

    '''
    ranked_locales = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if languageId and scriptId and territoryId and languageId+'_'+scriptId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+scriptId+'_'+territoryId
        skipTerritory = True
    elif languageId and scriptId and languageId+'_'+scriptId in _languages_db:
        languageId = languageId+'_'+scriptId
    elif languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 100
    if languageId in _languages_db:
        for locale in _languages_db[languageId].locales:
            if _languages_db[languageId].locales[locale] != 0:
                if locale not in ranked_locales:
                    ranked_locales[locale] = _languages_db[languageId].locales[locale]
                else:
                    ranked_locales[locale] *= _languages_db[languageId].locales[locale]
                    ranked_locales[locale] *= extra_bonus
                ranked_locales[locale] *= language_bonus
    territory_bonus = 1
    if territoryId in _territories_db and not skipTerritory:
        for locale in _territories_db[territoryId].locales:
            if _territories_db[territoryId].locales[locale] != 0:
                if locale not in ranked_locales:
                    ranked_locales[locale] = _territories_db[territoryId].locales[locale]
                else:
                    ranked_locales[locale] *= _territories_db[territoryId].locales[locale]
                    ranked_locales[locale] *= extra_bonus
                ranked_locales[locale] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_locales)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def list_scripts(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    '''List scripts used for a language and/or in a territory

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    Returns a list of ISO-15924 script ids:

    https://en.wikipedia.org/wiki/ISO_15924

    **Examples:**

    List the suitable scripts for the language “Serbian”:

    >>> list_scripts(languageId="sr")
    ['Cyrl', 'Latn']

    So this returns a list of scripts which are in use for
    Serbian. These lists are sorted in order of decreasing likelyhood,
    i.e. the most common value comes first.

    List the suitable scripts for the language “Punjabi”:

    >>> list_scripts(languageId="pa")
    ['Guru', 'Arab']

    One can also list the possible scripts for a territory like
    “Pakistan”:

    >>> list_scripts(territoryId="PK")
    ['Arab']

    If one knows both, the language “Punjabi” and the territory
    “Pakistan” or “India”, one can find out which script is the
    preferred one:

    >>> list_scripts(languageId="pa", territoryId="PK")
    ['Arab']

    So the preferred script for Punjabi in Pakistan is “Arab”

    >>> list_scripts(languageId="pa", territoryId="IN")
    ['Guru', 'Arab']

    and the preferred script for Punjabi in India is “Guru”.

    '''
    ranked_scripts = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if scriptId:
        # scriptId is already given in the input, just return it:
        return [scriptId]
    if languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 100
    if languageId in _languages_db:
        for script in _languages_db[languageId].scripts:
            if _languages_db[languageId].scripts[script] != 0:
                if script not in ranked_scripts:
                    ranked_scripts[script] = _languages_db[languageId].scripts[script]
                else:
                    ranked_scripts[script] *= _languages_db[languageId].scripts[script]
                    ranked_scripts[script] *= extra_bonus
                ranked_scripts[script] *= language_bonus
    territory_bonus = 1
    if territoryId in _territories_db and not skipTerritory:
        for script in _territories_db[territoryId].scripts:
            if _territories_db[territoryId].scripts[script] != 0:
                if script not in ranked_scripts:
                    ranked_scripts[script] = _territories_db[territoryId].scripts[script]
                else:
                    ranked_scripts[script] *= _territories_db[territoryId].scripts[script]
                    ranked_scripts[script] *= extra_bonus
                ranked_scripts[script] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_scripts)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def list_inputmethods(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    '''List suitable input methods

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    **Examples:**

    List the suitable input methods for the language “Japanese”:

    >>> list_inputmethods(languageId="ja")
    ['ibus/kkc', 'ibus/anthy']

    So this returns a list of input methods for Japanese. These lists are
    sorted in order of decreasing likelyhood, i.e. the most common
    value comes first.

    One can also list the possible input methods for the territory “Japan”:

    >>> list_inputmethods(territoryId="JP")
    ['ibus/kkc', 'ibus/anthy']
    '''
    ranked_inputmethods = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if languageId and scriptId and territoryId and languageId+'_'+scriptId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+scriptId+'_'+territoryId
        skipTerritory = True
    elif languageId and scriptId and languageId+'_'+scriptId in _languages_db:
        languageId = languageId+'_'+scriptId
        skipTerritory = True
    elif languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 100
    if languageId in _languages_db:
        for inputmethod in _languages_db[languageId].inputmethods:
            if _languages_db[languageId].inputmethods[inputmethod] != 0:
                if inputmethod not in ranked_inputmethods:
                    ranked_inputmethods[inputmethod] = _languages_db[languageId].inputmethods[inputmethod]
                else:
                    ranked_inputmethods[inputmethod] *= _languages_db[languageId].inputmethods[inputmethod]
                    ranked_inputmethods[inputmethod] *= extra_bonus
                ranked_inputmethods[inputmethod] *= language_bonus
    territory_bonus = 1
    if territoryId in _territories_db and not skipTerritory:
        for inputmethod in _territories_db[territoryId].inputmethods:
            if _territories_db[territoryId].inputmethods[inputmethod] != 0:
                if inputmethod not in ranked_inputmethods:
                    ranked_inputmethods[inputmethod] = _territories_db[territoryId].inputmethods[inputmethod]
                else:
                    ranked_inputmethods[inputmethod] *= _territories_db[territoryId].inputmethods[inputmethod]
                    ranked_inputmethods[inputmethod] *= extra_bonus
                ranked_inputmethods[inputmethod] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_inputmethods)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def list_keyboards(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    '''List likely X11 keyboard layouts

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    **Examples:**

    Listing likely X11 keyboard layouts for “German”:

    >>> list_keyboards(languageId="de")
    ['de(nodeadkeys)', 'de(deadacute)', 'at(nodeadkeys)', 'ch', 'be(oss)']

    Listing likely X11 keyboard layouts for “Switzerland”:

    >>> list_keyboards(territoryId="CH")
    ['ch', 'ch(fr)', 'it']

    When specifying both “German” *and* “Switzerland”, the
    returned X11 keyboard layout is unique:

    >>> list_keyboards(languageId="de", territoryId="CH")
    ['ch']
    '''
    ranked_keyboards = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if languageId and scriptId and territoryId and languageId+'_'+scriptId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+scriptId+'_'+territoryId
        skipTerritory = True
    elif languageId and scriptId and languageId+'_'+scriptId in _languages_db:
        languageId = languageId+'_'+scriptId
    elif languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 1
    if languageId in _languages_db:
        for keyboard in _languages_db[languageId].keyboards:
            if _languages_db[languageId].keyboards[keyboard] != 0:
                if keyboard not in ranked_keyboards:
                    ranked_keyboards[keyboard] = _languages_db[languageId].keyboards[keyboard]
                else:
                    ranked_keyboards[keyboard] *= _languages_db[languageId].keyboards[keyboard]
                    ranked_keyboards[keyboard] *= extra_bonus
                ranked_keyboards[keyboard] *= language_bonus
    territory_bonus = 1
    if territoryId in _territories_db:
        for keyboard in _territories_db[territoryId].keyboards:
            if _territories_db[territoryId].keyboards[keyboard] != 0:
                if keyboard not in ranked_keyboards:
                    ranked_keyboards[keyboard] = _territories_db[territoryId].keyboards[keyboard]
                else:
                    ranked_keyboards[keyboard] *= _territories_db[territoryId].keyboards[keyboard]
                    ranked_keyboards[keyboard] *= extra_bonus
                ranked_keyboards[keyboard] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_keyboards)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def list_consolefonts(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    u'''List likely Linux Console fonts

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    **Examples:**

    Listing likely console fonts  for English:

    >>> list_consolefonts(languageId="en")
    ['latarcyrheb-sun16']

    Listing likely console fonts for Greek:

    >>> list_consolefonts(languageId="el")
    ['iso07u-16', 'LatGrkCyr-8x16']

    Listing likely console fonts for Greece:

    >>> list_consolefonts(territoryId="GR")
    ['iso07u-16', 'LatGrkCyr-8x16']

    Listing likely console fonts for Greek in Greece:

    list_consolefonts(languageId="el", territoryId="GR")
    ['iso07u-16']

    Listing likely console fonts for Greek in a non-Greek country like
    the UK (the language has higher weight):

    >>> list_consolefonts(languageId="el", territoryId="GB")
    ['iso07u-16', 'LatGrkCyr-8x16', 'latarcyrheb-sun16']

    '''
    ranked_consolefonts = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if languageId and scriptId and territoryId and languageId+'_'+scriptId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+scriptId+'_'+territoryId
        skipTerritory = True
    elif languageId and scriptId and languageId+'_'+scriptId in _languages_db:
        languageId = languageId+'_'+scriptId
    elif languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 100
    if languageId in _languages_db:
        for consolefont in _languages_db[languageId].consolefonts:
            if _languages_db[languageId].consolefonts[consolefont] != 0:
                if consolefont not in ranked_consolefonts:
                    ranked_consolefonts[consolefont] = _languages_db[languageId].consolefonts[consolefont]
                else:
                    ranked_consolefonts[consolefont] *= _languages_db[languageId].consolefonts[consolefont]
                    ranked_consolefonts[consolefont] *= extra_bonus
                ranked_consolefonts[consolefont] *= language_bonus
    territory_bonus = 1
    if territoryId in _territories_db:
        for consolefont in _territories_db[territoryId].consolefonts:
            if _territories_db[territoryId].consolefonts[consolefont] != 0:
                if consolefont not in ranked_consolefonts:
                    ranked_consolefonts[consolefont] = _territories_db[territoryId].consolefonts[consolefont]
                else:
                    ranked_consolefonts[consolefont] *= _territories_db[territoryId].consolefonts[consolefont]
                    ranked_consolefonts[consolefont] *= extra_bonus
                ranked_consolefonts[consolefont] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_consolefonts)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def list_timezones(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
    '''List likely timezones

    :param concise: if True, return only to highly ranked results
    :type concise: boolean
    :param show_weights: Also return the weights used in the ranking
    :type show_weights: boolean
    :param languageId: identifier for the language
    :type languageId: string
    :param scriptId: identifier for the script
    :type scriptId: string
    :param territoryId: identifier for the territory
    :type territoryId: string
    :rtype: a list of strings

    **Examples:**

    >>> list_timezones(territoryId="DE")
    ['Europe/Berlin']

    >>> list_timezones(languageId="de")
    ['Europe/Berlin', 'Europe/Vienna', 'Europe/Zurich', 'Europe/Brussels', 'Europe/Luxembourg']

    >>> list_timezones(territoryId="CH")
    ['Europe/Zurich']

    >>> list_timezones(languageId="fr", territoryId="CH")
    ['Europe/Zurich']

    >>> list_timezones(languageId="fr")
    ['Europe/Paris', 'America/Montreal', 'Europe/Brussels', 'Europe/Zurich', 'Europe/Luxembourg']

    The territory gets more weight than the language:

    >>> list_timezones(languageId="ja", territoryId="CH")
    ['Europe/Zurich', 'Asia/Tokyo']
    '''
    ranked_timezones = {}
    skipTerritory = False
    languageId, scriptId, territoryId = _parse_and_split_languageId(
        languageId=languageId,
        scriptId=scriptId,
        territoryId=territoryId)
    if languageId and scriptId and territoryId and languageId+'_'+scriptId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+scriptId+'_'+territoryId
        skipTerritory = True
    elif languageId and scriptId and languageId+'_'+scriptId in _languages_db:
        languageId = languageId+'_'+scriptId
    elif languageId and territoryId and languageId+'_'+territoryId in _languages_db:
        languageId = languageId+'_'+territoryId
        skipTerritory = True
    language_bonus = 1
    if languageId in _languages_db:
        for timezone in _languages_db[languageId].timezones:
            if _languages_db[languageId].timezones[timezone] != 0:
                if timezone not in ranked_timezones:
                    ranked_timezones[timezone] = _languages_db[languageId].timezones[timezone]
                else:
                    ranked_timezones[timezone] *= _languages_db[languageId].timezones[timezone]
                    ranked_timezones[timezone] *= extra_bonus
                ranked_timezones[timezone] *= language_bonus
    territory_bonus = 100
    if territoryId in _territories_db:
        for timezone in _territories_db[territoryId].timezones:
            if _territories_db[territoryId].timezones[timezone] != 0:
                if timezone not in ranked_timezones:
                    ranked_timezones[timezone] = _territories_db[territoryId].timezones[timezone]
                else:
                    ranked_timezones[timezone] *= _territories_db[territoryId].timezones[timezone]
                    ranked_timezones[timezone] *= extra_bonus
                ranked_timezones[timezone] *= territory_bonus
    ranked_list = _dictionary_to_ranked_list(ranked_timezones)
    if concise:
        ranked_list = _make_ranked_list_concise(ranked_list)
    if show_weights:
        return ranked_list
    else:
        return _ranked_list_to_list(ranked_list)

def supports_ascii(keyboardId=None):
    '''Check whether a keyboard layout supports ASCII

    :param keyboardId: identifier for the keyboard
    :type keyboardId: string
    :rtype: string

    Returns True if the keyboard layout with that id can be used to
    type ASCII, returns false if the keyboard layout can not be used
    to type ASCII or if typing ASCII with that keyboard layout is
    difficult.

    **Examples:**

    >>> supports_ascii("jp")
    True
    >>> supports_ascii("ru")
    False
    '''
    if keyboardId in _keyboards_db:
        return _keyboards_db[keyboardId].ascii
    return True

def _test_cldr_locale_pattern(localeId):
    '''
    Internal test function, do not use this.
    '''
    match = _cldr_locale_pattern.match(localeId)
    if match:
        return [('language', match.group('language')), ('script', match.group('script')), ('territory', match.group('territory'))]
    else:
        return  []

def _test_language_territory(show_weights=False, languageId=None, scriptId=None, territoryId=None):
    '''
    Internal test function, do not use this.
    '''
    print(str(languageId)+": "
          +repr(list_locales(show_weights=show_weights,languageId=languageId))
          +'\n'
          +str(territoryId)+": "
          +repr(list_locales(show_weights=show_weights,territoryId=territoryId))
          +'\n'
          +" +: "
          +repr(list_locales(show_weights=show_weights,languageId=languageId,scriptId=scriptId,territoryId=territoryId))
          +'\n'
          +str(languageId)+": "
          +repr(list_keyboards(show_weights=show_weights,languageId=languageId))
          +'\n'
          +str(territoryId)+": "
          +repr(list_keyboards(show_weights=show_weights,territoryId=territoryId))
          +'\n'
          +" +: "
          +repr(list_keyboards(show_weights=show_weights,languageId=languageId,scriptId=scriptId,territoryId=territoryId))
          )
    return

def _init(debug = False,
         logfilename = '/dev/null',
         datadir = _datadir):

    log_level = logging.INFO
    if debug:
        log_level = logging.DEBUG
    logging.basicConfig(filename=logfilename,
                        filemode="w",
                        format="%(levelname)s: %(message)s",
                        level=log_level)

    _read_file(datadir, 'territories.xml', TerritoriesContentHandler())
    _read_file(datadir, 'languages.xml', LanguagesContentHandler())
    _read_file(datadir, 'keyboards.xml', KeyboardsContentHandler())
    _read_file(datadir, 'timezones.xml', TimezonesContentHandler())
    _read_file(datadir, 'timezoneidparts.xml', TimezoneIdPartsContentHandler())

class __ModuleInitializer:
    def __init__(self):
        _init()
        return

    def __del__(self):
        return

__module_init = __ModuleInitializer()

if __name__ == "__main__":
    import doctest
    _init()
    doctest.testmod()