Index: README =================================================================== RCS file: /cvsroot/hunspell/hunspell/README,v retrieving revision 1.1.1.1 diff -u -r1.1.1.1 README --- README 23 Feb 2010 09:08:50 -0000 1.1.1.1 +++ README 20 May 2010 15:41:48 -0000 @@ -146,12 +146,12 @@ run the Hunspell spell checker (compiled with user interface) with a Hunspell or Myspell dictionary: -hunspell -d en_US text.txt +hunspell -d en-US text.txt or without interface: hunspell -hunspell -d en_UK -l dot_str) { + free(posloc->dot_str); + posloc->dot_str = NULL; + } + char *finalfallback = posixlocale_to_string(posloc); + if ((strcmp(finalfallback, "C") == 0) || (strcmp(finalfallback, "POSIX") == 0)) { + free(finalfallback); + finalfallback=mystrdup("en_US"); + } + if (dictexists(finalfallback)) { + ret = finalfallback; + } + else { + free(finalfallback); + ret = bestname; + } + } + free(langtag); + + if (ret != bestname) + free(bestname); + } + posixlocale_delete(posloc); + + if (!ret) { + ret=mystrdup(DEFAULTDICNAME); + } + + return ret; +} + int main(int argc, char** argv) { char buf[MAXLNLEN]; @@ -1499,7 +1606,7 @@ fprintf(stderr,gettext(" -v, --version\tprint version number\n")); fprintf(stderr,gettext(" -vv\t\tprint Ispell compatible version number\n")); fprintf(stderr,gettext(" -w\t\tprint misspelled words (= lines) from one word/line input.\n\n")); - fprintf(stderr,gettext("Example: hunspell -d en_US file.txt # interactive spelling\n" + fprintf(stderr,gettext("Example: hunspell -d en-US file.txt # interactive spelling\n" " hunspell -l file.txt # print misspelled words\n" " hunspell -i utf-8 file.txt # check UTF-8 encoded file\n\n")); fprintf(stderr,gettext("Bug reports: http://hunspell.sourceforge.net\n")); @@ -1614,36 +1721,6 @@ if (printgood && (filter_mode == NORMAL)) filter_mode = BADWORD; - if (! dicname) { - if (! (dicname=getenv("DICTIONARY"))) { - /* - * Search in order of LC_ALL, LC_MESSAGES & - * LANG - */ - const char *tests[] = { "LC_ALL", "LC_MESSAGES", "LANG" }; - for (size_t i = 0; i < sizeof(tests) / sizeof(const char*); ++i) { - if ((dicname=getenv(tests[i])) && strcmp(dicname, "") != 0) { - dicname = mystrdup(dicname); - char * dot = strchr(dicname, '.'); - if (dot) *dot = '\0'; - char * at = strchr(dicname, '@'); - if (at) *at = '\0'; - break; - } - } - - if (dicname && ((strcmp(dicname, "C") == 0) || (strcmp(dicname, "POSIX") == 0))) { - free(dicname); - dicname=mystrdup("en_US"); - } - - if (! dicname) { - dicname=mystrdup(DEFAULTDICNAME); - } - } else { - dicname = mystrdup(dicname); - } - } path = add(mystrdup("."), PATHSEP); // <- check path in local directory path = add(path, PATHSEP); // <- check path in root directory if (getenv("DICPATH")) path = add(add(path, getenv("DICPATH")), PATHSEP); @@ -1659,6 +1736,13 @@ if (!privdicname) privdicname = mystrdup(getenv("WORDLIST")); + if (! dicname) { + dicname = autodetect_dict(); + } + else { + dicname = mystrdup(dicname); + } + char * dicplus = strchr(dicname, ','); if (dicplus) *dicplus = '\0'; char * aff = search(path, dicname, ".aff"); --- /dev/null 2010-05-01 11:49:38.510178306 +0100 +++ src/tools/localehelper.c 2010-05-20 15:29:10.000000000 +0100 @@ -0,0 +1,1956 @@ +/* localehelper - Library of useful routines for BCP 47 + * Copyright (C) 2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include + +#include "localehelper.h" + +#include "subtag_properties.h" + +#define MAX_ALLOWED_EXTLANGS 1 + +#if (_POSIX_C_SOURCE - 0) >= 200809L +# define x_strdup strdup +#elif defined(_MSC_VER) +# define x_strdup _strdup +#else +char* x_strdup(const char *s) +{ + size_t len = strlen(s)+1; + void *d = malloc(len); + + if (d == NULL) + return NULL; + + return (char *)memcpy(d, s, len); +} +#endif +#if (_POSIX_C_SOURCE - 0) >= 200809L +# define x_strndup strndup +#else +char * x_strndup(const char *s, size_t n) +{ + char *d = malloc(n+1); + + if (d == NULL) + return NULL; + + d[n] = '\0'; + return strncpy(d, s, n); +} +#endif + +BCP47_langtag* BCP47_langtag_new() +{ + return (BCP47_langtag*)calloc(1, sizeof(BCP47_langtag)); +} + +static size_t len_tag_array(char **tags) +{ + size_t len = 0; + if (tags) + { + char **ps; + for (ps = tags; *ps; ++ps) + len += strlen(*ps) + 1; + } + return len; +} + +static size_t write_tag_array(char *p, char **tags) +{ + char *s = p; + if (tags) + { + char **ps; + for (ps = tags; *ps; ++ps) + { + *p++ = '-'; + strcpy(p, *ps); + p+=strlen(*ps); + } + } + return p - s; +} + +static void free_tag_array(char **tags) +{ + if (tags) + { + char **ps; + for (ps = tags; *ps; ++ps) + free(*ps); + free(tags); + } +} + +static char** copy_tag_array(char **tags) +{ + char **ret = NULL; + if (tags) + { + char **ps = NULL; + size_t i, len = 0; + for (ps = tags; *ps; ++ps) + ++len; + ret = (char**)malloc(sizeof(char*) * (len+1)); + for (i = 0; i < len; ++i) + ret[i] = x_strdup(tags[i]); + ret[i] = 0; + } + return ret; +} + +static size_t count_tag_array_elems(char **tags) +{ + size_t len = 0; + if (tags) + { + char **ps; + for (ps = tags; *ps; ++ps, ++len) { } + } + return len; +} + +void BCP47_langtag_delete(BCP47_langtag *langtag) +{ + if (!langtag) + return; + + free(langtag->language); + free_tag_array(langtag->extlangs); + free(langtag->script); + free(langtag->region); + free_tag_array(langtag->variants); + + if (langtag->extensions) + { + BCP47_extension **pe; + for (pe = langtag->extensions; *pe; ++pe) + BCP47_extension_delete(*pe); + free(langtag->extensions); + } + + BCP47_extension_delete(langtag->privateuse); + free(langtag); +} + +BCP47_langtag* BCP47_langtag_copy(const BCP47_langtag *langtag) +{ + BCP47_langtag *ret = NULL; + if (langtag) + { + ret = BCP47_langtag_new(); + if (langtag->language) + ret->language = x_strdup(langtag->language); + if (langtag->extlangs) + ret->extlangs = copy_tag_array(langtag->extlangs); + if (langtag->script) + ret->script = x_strdup(langtag->script); + if (langtag->region) + ret->region = x_strdup(langtag->region); + + if (langtag->variants) + ret->variants = copy_tag_array(langtag->variants); + + if (langtag->extensions) + { + BCP47_extension **pe = NULL; + size_t i, len = 0; + for (pe = langtag->extensions; *pe; ++pe) + ++len; + ret->extensions = (BCP47_extension**)malloc(sizeof(BCP47_extension*) * len+1); + for (i = 0; i < len; ++i) + ret->extensions[i] = BCP47_extension_copy(langtag->extensions[1]); + ret->extensions[i] = 0; + } + + if (langtag->privateuse) + ret->privateuse = BCP47_extension_copy(langtag->privateuse); + } + return ret; +} + +char *BCP47_langtag_to_string(BCP47_langtag *langtag) +{ + size_t len, language_len, script_len, region_len; + char *ret, *p; + + if (!langtag) + return NULL; + + language_len = strlen(langtag->language); + script_len = langtag->script ? strlen(langtag->script) + 1 : 0; + region_len = langtag->region ? strlen(langtag->region) + 1 : 0; + + len = language_len + script_len + region_len + 1; + + len += len_tag_array(langtag->extlangs); + + len += len_tag_array(langtag->variants); + + if (langtag->extensions) + { + BCP47_extension **pe; + for (pe = langtag->extensions; *pe; ++pe) + len+=len_tag_array((*pe)->alphanums) + 2; + } + + if (langtag->privateuse) + len+=len_tag_array(langtag->privateuse->alphanums) + 2; + + p = ret = (char*)malloc(len); + + strcpy(ret, langtag->language); + p+=language_len; + p+=write_tag_array(p, langtag->extlangs); + if (script_len) + { + *p++ = '-'; + strcpy(p, langtag->script); + p+=script_len-1; + } + if (region_len) + { + *p++ = '-'; + strcpy(p, langtag->region); + p+=region_len-1; + } + if (langtag->variants) + p+=write_tag_array(p, langtag->variants); + if (langtag->extensions) + { + BCP47_extension **pe; + for (pe = langtag->extensions; *pe; ++pe) + { + *p++ = '-'; + *p++ = (*pe)->singleton; + p+=write_tag_array(p, (*pe)->alphanums); + } + } + if (langtag->privateuse) + { + *p++ = '-'; + *p++ = langtag->privateuse->singleton; + p+=write_tag_array(p, langtag->privateuse->alphanums); + } + return ret; +} + +/*is_ family is affected by setlocale*/ +static int ischarALPHA(char c) +{ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +static int isALPHA(const char *str) +{ + for (;*str;++str) + { + if (!ischarALPHA(*str)) + return 0; + } + return 1; +} + +static int ischarDIGIT(char c) +{ + return (c >= '0' && c <= '9'); +} + +static int isDIGIT(const char *str) +{ + for (;*str;++str) + { + if (!ischarDIGIT(*str)) + return 0; + } + return 1; +} + +static int isALPHANUM(const char *str) +{ + for (;*str;++str) + { + if (!ischarDIGIT(*str) && !ischarALPHA(*str)) + return 0; + } + return 1; +} + +static char toasciilower(char c) +{ + if(('A' <= c) && (c <= 'Z')) + c = 'a' + (c - 'A'); + return c; +} + +static char toasciiupper(char c) +{ + if(('a' <= c) && (c <= 'z')) + c = 'A' + (c - 'a'); + return c; +} + +static int asciicasecmp(const char *s1, const char *s2) +{ + char c1, c2; + do + { + c1 = toasciilower(*s1++); + c2 = toasciilower(*s2++); + } while(c1 == c2 && c1 != '\0'); + return c2 > c1 ? -1 : c1 > c2; +} + +/* + * glibc typically uses these modifiers to indicate particular + * scripts that the language is written in + * See ISO-15924 http://unicode.org/iso15924/iso15924-codes.html + */ +static int map_modifier_to_script_code(const char *at_str, const char **script_code) +{ + if (at_str) + { + static const char* const tag_map[][2] = + { + { "Arabic", "Arab" }, + { "Imperial_Aramaic", "Armi" }, + { "Armenian", "Armn" }, + { "Avestan", "Avst" }, + { "Balinese", "Bali" }, + { "Bamum", "Bamu" }, + { "Bengali", "Beng" }, + { "Bopomofo", "Bopo" }, + { "Braille", "Brai" }, + { "Buginese", "Bugi" }, + { "Buhid", "Buhd" }, + { "Canadian_Aboriginal", "Cans" }, + { "Carian", "Cari" }, + { "Cham", "Cham" }, + { "Cherokee", "Cher" }, + { "Coptic", "Copt" }, + { "Cypriot", "Cprt" }, + { "Cyrillic", "Cyrl" }, + { "Devanagari", "Deva" }, + { "Deseret", "Dsrt" }, + { "Egyptian_Hierogyphs", "Egyp" }, + { "Ethiopic", "Ethi" }, + { "Georgian", "Geor" }, + { "Glagolitic", "Glag" }, + { "Gothic", "Goth" }, + { "Greek", "Grek" }, + { "Gujarati", "Gujr" }, + { "Gurmukhi", "Guru" }, + { "Hangul", "Hang" }, + { "Han", "Hani" }, + { "Hanunoo", "Hano" }, + { "Hebrew", "Hebr" }, + { "Hiragana", "Hira" }, + { "Katakana_Or_Hiragana", "Hrkt" }, + { "Old_Italic", "Ital" }, + { "Javanese", "Java" }, + { "Kayah_Li", "Kali" }, + { "Katakana", "Kana" }, + { "Kharoshthi", "Khar" }, + { "Khmer", "Khmr" }, + { "Kannada", "Knda" }, + { "Kaithi", "Kthi" }, + { "Tai_Tham", "Lana" }, + { "Lao", "Laoo" }, + { "Latin", "Latn" }, + { "Lepcha", "Lepc" }, + { "Limbu", "Limb" }, + { "Linear_B", "Linb" }, + { "Lisu", "Lisu" }, + { "Lycian", "Lyci" }, + { "Lydian", "Lydi" }, + { "Malayalam", "Mlym" }, + { "Mongolian", "Mong" }, + { "Meetei_Mayek", "Mtei" }, + { "Myanmar", "Mymr" }, + { "Nko", "Nkoo" }, + { "Ogham", "Ogam" }, + { "Ol_Chiki", "Olck" }, + { "Old_Turkic", "Orkh" }, + { "Oriya", "Orya" }, + { "Osmanya", "Osma" }, + { "Phags_Pa", "Phag" }, + { "Inscriptional_Pahlavi", "Phli" }, + { "Phoenician", "Phnx" }, + { "Inscriptional_Parthian", "Prti" }, + { "Rejang", "Rjng" }, + { "Runic", "Runr" }, + { "Samaritan", "Samr" }, + { "Old_South_Arabian", "Sarb" }, + { "Saurashtra", "Saur" }, + { "Shavian", "Shaw" }, + { "Sinhala", "Sinh" }, + { "Sundanese", "Sund" }, + { "Syloti_Nagri", "Sylo" }, + { "Syriac", "Syrc" }, + { "Tagbanwa", "Tagb" }, + { "Tai_Le", "Tale" }, + { "New_Tai_Lue", "Talu" }, + { "Tamil", "Taml" }, + { "Tai_Viet", "Tavt" }, + { "Telugu", "Telu" }, + { "Tifinagh", "Tfng" }, + { "Tagalog", "Tglg" }, + { "Thaana", "Thaa" }, + { "Thai", "Thai" }, + { "Tibetan", "Tibt" }, + { "Ugaritic", "Ugar" }, + { "Vai", "Vaii" }, + { "Old_Persian", "Xpeo" }, + { "Cuneiform", "Xsux" }, + { "Yi", "Yiii" }, + { "Inherited", "Zinh" }, + { "Common", "Zyyy" }, + { "Unknown", "Zzzz" }, + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + { + if (asciicasecmp(at_str, tag_map[i][0]) == 0) + { + *script_code = tag_map[i][1]; + return 1; + } + } + /* + * Special case this one. The script is definitely Latin + * and not Cyrillic. But lets bubble the transliteration scheme + * through another layer with return 0 + */ + if (asciicasecmp(at_str, "iqtelif") == 0) + { + *script_code = "Latn"; + return 0; + } + } + return 0; +} + +/* + * Occasionally (ca_ES@valencia) some modifiers indicate a language variant + * See http://www.iana.org/assignments/language-subtag-registry + * for IANA language subtag assignments output codes + */ +static int map_modifier_to_variant_code(const char *at_str, const char **variant_code) +{ + if (at_str) + { + static const char* const tag_map[][2] = + { + { "valencia", "valencia" } + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + if (asciicasecmp(at_str, tag_map[i][0]) == 0) + { + *variant_code = tag_map[i][1]; + return 1; + } + } + return 0; +} + + +/* + * These languages are modified in glibc to indicate a specific dialect of that + * language, but those dialacts has now been redefined as standalone languages + * in their own right + */ +static int map_incorrectly_modified_lang_to_lang_tags(const char *tag, const char *at_str, const char **lang_tag) +{ + if (tag && at_str) + { + static const char* const tag_map[][3] = + { + /* + * /usr/share/i18n/locales/aa_ER@saaho Afar language locale for + * Eritrea (Saaho Dialect). + * + * http://www.ethnologue.com/show_language.asp?code=ssy Saho, A + * language of Eritrea. Very similar to Afar. + */ + { "aa", "saaho", "ssy" } + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + { + if ((asciicasecmp(tag, tag_map[i][0]) == 0) && (asciicasecmp(at_str, tag_map[i][1]) == 0)) + { + *lang_tag = tag_map[i][2]; + return 1; + } + } + } + return 0; +} + +/* + * Replace broken modifier strings with what they should have been + * + * https://bugzilla.redhat.com/show_bug.cgi?id=589138 + */ +static const char * fix_broken_modifier(const char *at_str) +{ + if (at_str) + { + static const char* const tag_map[][2] = + { + { "iqtelif.UTF-8", "iqtelif" } + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + if (asciicasecmp(at_str, tag_map[i][0]) == 0) + return tag_map[i][1]; + } + return at_str; +} + +/* Ugly */ +static const char * map_modifier_to_private_tag(const char *at_str) +{ + if (at_str) + { + static const char* const tag_map[][2] = + { + /* + * Old mechanism to denote that the euro currency is in use, + * ignore it. + */ + { "euro", NULL }, + /* + * http://www.mail-archive.com/cygwin@cygwin.com/msg97848.html + * + * A modifier that indicates what width to assign to an + * ambiguous width char, ignore it. + * + * http://unicode.org/reports/tr11/ + */ + { "cjknarrow", NULL }, + /* + * http://www.geez.org/Collation/ + * + * Abegede Collation for Ge'ez (as opposed to Halehame, I believe) + * + * http://www.iana.org/assignments/language-subtag-registry has + * nothing to describe it, so using a private code + * + * http://tools.ietf.org/html/draft-davis-u-langtag-ext-01 + * http://www.unicode.org/reports/tr35/ maybe u-co-something some day + */ + { "abegede", "abegede" }, + + /* + * http://www.alvestrand.no/pipermail/ietf-languages/2006-September/005017.html + * + * "iqtelif" Latin orthography + * + * Bit of a mess really. Unsure if tt-Latn is sufficient, i.e. if this is + * the default latin orghography in practice but a private code + * doesn't hurt I guess + */ + { "iqtelif", "iqtel" } + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + if (asciicasecmp(at_str, tag_map[i][0]) == 0) + return tag_map[i][1]; + + fprintf(stderr, "%s unknown. Please add\n", at_str); + + return at_str; + } + return NULL; +} + +static void map_modifiers(const posixlocale *posloc, const char **lang_tag, const char **script_tag, + const char **variant_tag, const char **private_tag) +{ + const char *at_str = fix_broken_modifier(posloc->at_str); + if (!map_incorrectly_modified_lang_to_lang_tags(posloc->main_str, at_str, lang_tag)) + { + *lang_tag = posloc->main_str; + if (!map_modifier_to_script_code(at_str, script_tag)) + if (!map_modifier_to_variant_code(at_str, variant_tag)) + *private_tag = map_modifier_to_private_tag(at_str); + } +} + +/*These languages are obsolete, replace them with modern codes*/ +static const char * map_obsolete_languages(const char *tag) +{ + if (tag) + { + static const char* const tag_map[][2] = + { + { "iw", "he" }, { "ji", "yi" }, + { "in", "id" } + }; + size_t i; + for (i = 0; i < sizeof(tag_map)/sizeof(char*[2]); ++i) + if (asciicasecmp(tag, tag_map[i][0]) == 0) + return tag_map[i][1]; + } + return tag; +} + +/* + * Resolve locale aliases + */ +static const char * resolve_gettext_locale_aliases(const char *locale) +{ + if (locale) + { + static const char* const alias_map[][2] = + { + { "bokmal", "nb_NO.ISO-8859-1" }, { "bokm""\xE5""l", "nb_NO.ISO-8859-1" }, + { "bokmål", "nb_NO.ISO-8859-1" }, { "catalan", "ca_ES.ISO-8859-1" }, + { "croatian", "hr_HR.ISO-8859-2" }, { "czech", "cs_CZ.ISO-8859-2" }, + { "danish", "da_DK.ISO-8859-1" }, { "dansk", "da_DK.ISO-8859-1" }, + { "deutsch", "de_DE.ISO-8859-1" }, { "dutch", "nl_NL.ISO-8859-1" }, + { "eesti", "et_EE.ISO-8859-1" }, { "estonian", "et_EE.ISO-8859-1" }, + { "finnish", "fi_FI.ISO-8859-1" }, { "fran""\xE7""ais", "fr_FR.ISO-8859-1" }, + { "français", "fr_FR.ISO-8859-1" }, { "french", "fr_FR.ISO-8859-1" }, + { "galego", "gl_ES.ISO-8859-1" }, { "galician", "gl_ES.ISO-8859-1" }, + { "german", "de_DE.ISO-8859-1" }, { "greek", "el_GR.ISO-8859-7" }, + { "hebrew", "he_IL.ISO-8859-8" }, { "hrvatski", "hr_HR.ISO-8859-2" }, + { "hungarian", "hu_HU.ISO-8859-2" }, { "icelandic", "is_IS.ISO-8859-1" }, + { "italian", "it_IT.ISO-8859-1" }, { "japanese", "ja_JP.eucJP" }, + { "japanese.euc", "ja_JP.eucJP" }, { "ja_JP", "ja_JP.eucJP" }, + { "ja_JP.ujis", "ja_JP.eucJP" }, { "japanese.sjis", "ja_JP.SJIS" }, + { "korean", "ko_KR.eucKR" }, { "korean.euc", "ko_KR.eucKR" }, + { "ko_KR", "ko_KR.eucKR" }, { "lithuanian", "lt_LT.ISO-8859-13" }, + { "no_NO", "nb_NO.ISO-8859-1" }, { "no_NO.ISO-8859-1", "nb_NO.ISO-8859-1" }, + { "norwegian", "nb_NO.ISO-8859-1" }, { "nynorsk", "nn_NO.ISO-8859-1" }, + { "polish", "pl_PL.ISO-8859-2" }, { "portuguese", "pt_PT.ISO-8859-1" }, + { "romanian", "ro_RO.ISO-8859-2" }, { "russian", "ru_RU.ISO-8859-5" }, + { "slovak", "sk_SK.ISO-8859-2" }, { "slovene", "sl_SI.ISO-8859-2" }, + { "slovenian", "sl_SI.ISO-8859-2" }, { "spanish", "es_ES.ISO-8859-1" }, + { "swedish", "sv_SE.ISO-8859-1" }, { "thai", "th_TH.TIS-620" }, + { "turkish", "tr_TR.ISO-8859-9" } + }; + size_t i; + for (i = 0; i < sizeof(alias_map)/sizeof(char*[2]); ++i) + { + if (asciicasecmp(locale, alias_map[i][0]) == 0) + return alias_map[i][1]; + } + } + return locale; +} + +BCP47_LanguageTag *BCP47_LanguageTag_new_from_posixlocale(const posixlocale *posloc) +{ + BCP47_LanguageTag *LanguageTag = NULL; + const char *language = NULL; + const char *script_tag = NULL; + const char *territory_tag = NULL; + const char *variant_tag = NULL; + const char *private_tag = NULL; + int len; + + if (!posloc) + return NULL; + + len = strlen(posloc->main_str); + + if (len != 3 && len != 2) + { + /* + * There's no way this will be convertible, unless its C or POSIX, + * where en-US is a realistic conversion. Or this is an old-school + * locale name that getttext has an alias for, + */ + posixlocale *tmpposloc = NULL; + if ( + !posloc->underscore_str && !posloc->at_str && + ((strcmp(posloc->main_str, "C") == 0) || (strcmp(posloc->main_str, "POSIX") == 0)) + ) + { + tmpposloc = posixlocale_new_from_string("en_US"); + } + else + { + char *locale = posixlocale_to_string(posloc); + const char *alias = resolve_gettext_locale_aliases(locale); + if (alias != locale) + tmpposloc = posixlocale_new_from_string(alias); + free(locale); + } + + if (tmpposloc) + { + LanguageTag = BCP47_LanguageTag_new_from_posixlocale(tmpposloc); + posixlocale_delete(tmpposloc); + } + + return LanguageTag; + } + + map_modifiers(posloc, &language, &script_tag, &variant_tag, &private_tag); + + language = map_obsolete_languages(language); + territory_tag = posloc->underscore_str; + + LanguageTag = BCP47_LanguageTag_new(); + LanguageTag->langtag = BCP47_langtag_new(); + LanguageTag->langtag->language = x_strdup(language); + if (script_tag) + LanguageTag->langtag->script = x_strdup(script_tag); + if (territory_tag) + LanguageTag->langtag->region = x_strdup(territory_tag); + if (variant_tag) + { + LanguageTag->langtag->variants = (char**)malloc(sizeof(char*) * 2); + LanguageTag->langtag->variants[0] = x_strdup(variant_tag); + LanguageTag->langtag->variants[1] = NULL; + } + if (private_tag) + { + LanguageTag->langtag->privateuse = BCP47_extension_new(); + LanguageTag->langtag->privateuse->singleton = 'x'; + LanguageTag->langtag->privateuse->alphanums = (char**)malloc(sizeof(char*) * 2); + LanguageTag->langtag->privateuse->alphanums[0] = x_strdup(private_tag); + LanguageTag->langtag->privateuse->alphanums[1] = NULL; + } + + return LanguageTag; +} + +BCP47_LanguageTag *BCP47_LanguageTag_copy(const BCP47_LanguageTag* LanguageTag) +{ + BCP47_LanguageTag *ret = NULL; + if (LanguageTag) + { + ret = BCP47_LanguageTag_new(); + if (LanguageTag->privateuse) + ret->privateuse = BCP47_extension_copy(LanguageTag->privateuse); + if (LanguageTag->grandfathered) + ret->grandfathered = x_strdup(LanguageTag->grandfathered); + if (LanguageTag->langtag) + ret->langtag = BCP47_langtag_copy(LanguageTag->langtag); + } + return ret; +} + +BCP47_LanguageTag* BCP47_LanguageTag_new() +{ + return (BCP47_LanguageTag*)calloc(1, sizeof(BCP47_LanguageTag)); +} + +static void tolower_tag_array(char **tags) +{ + if (tags) + { + char **ps; + for (ps = tags; *ps; ++ps) + { + char *p; + for (p = *ps; *p; ++p) + *p = toasciilower(*p); + } + } +} + +static void tolower_bcpextension(BCP47_extension *extension) +{ + if (extension) + { + extension->singleton = toasciilower(extension->singleton); + tolower_tag_array(extension->alphanums); + } +} + +static char *firstchunk(const char **startchunk, size_t *len) +{ + const char *endchunk; + char *chunk; + + endchunk = *startchunk; + + while (*endchunk && *endchunk != '-') + ++endchunk; + + *len = endchunk - *startchunk; + + if (!*len) + return NULL; + + chunk = (char*)malloc(*len+1); + strncpy(chunk, *startchunk, *len); + chunk[*len] = 0; + *startchunk = endchunk; + return chunk; +} + +static char *nextchunk(const char **startchunk, size_t *len) +{ + if ((*startchunk)[0] != '-' || (*startchunk)[1] == '\0') + { + *len = 0; + return NULL; + } + (*startchunk)++; + return firstchunk(startchunk, len); +} + +static int is_extlang(size_t len, const char *chunk) +{ + return (len == 3 && isALPHA(chunk)); +} + +static int is_variant(size_t len, const char *chunk) +{ + if (((len >= 5 && len <= 8)) && isALPHANUM(chunk)) + return 1; + else if (len == 4 && ischarDIGIT(*chunk) && isALPHANUM(chunk+1)) + return 1; + return 0; +} + +static int is_script(size_t len, const char *chunk) +{ + return (len == 4) && isALPHA(chunk); +} + +static int is_region(size_t len, const char *chunk) +{ + if ((len == 2) && isALPHA(chunk)) + return 1; + else if (len == 3 && isDIGIT(chunk)) + return 1; + return 0; +} + +static int is_singleton(size_t len, const char *chunk) +{ + if (len != 1) + return 0; + if (ischarDIGIT(chunk[0])) + return 1; + if (chunk[0] < 0x41 || chunk[0] > 0x7A) + return 0; + if (chunk[0] == 0x58 || chunk[0] == 0x78) + return 0; + return 1; +} + +static int is_privateuse(size_t len, const char *chunk) +{ + return (len == 1 && (chunk[0] == 'x' || chunk[0] == 'X')); +} + +static int is_28_alphanum(size_t len, const char *chunk) +{ + return ((len >= 2 && len <= 8)) && isALPHANUM(chunk); +} + +static int is_18_alphanum(size_t len, const char *chunk) +{ + return ((len >= 1 && len <= 8)) && isALPHANUM(chunk); +} + +static const char* canonical_grandfather(const char *grandfather) +{ + if (grandfather) + { + size_t i; + for (i = 0; i < sizeof(lsr_grandfather_tag_properties)/sizeof(grandfathered_tag_properties); ++i) + { + if (asciicasecmp(grandfather, lsr_grandfather_tag_properties[i].tag) == 0) + { + if (lsr_grandfather_tag_properties[i].preferred_value) + return lsr_grandfather_tag_properties[i].preferred_value; + return lsr_grandfather_tag_properties[i].tag; + } + } + } + return NULL; +} + +static int is_grandfather(const char *tag) +{ + return canonical_grandfather(tag) != NULL ? 1 : 0; +} + +static char **BCP47_privateuse_from_string(const char *startchunk) +{ + size_t len = 0, i; + char **ret = NULL; + char *chunk = nextchunk(&startchunk, &len); + + for (i = 0; is_18_alphanum(len, chunk); ++i) + { + ret = (char**)realloc(ret, sizeof(char*) * (i+2)); + ret[i] = chunk; + ret[i+1] = NULL; + chunk = nextchunk(&startchunk, &len); + } + + if (len != 0) + { + free(chunk); + free_tag_array(ret); + return NULL; + } + + return ret; +} + +BCP47_LanguageTag* BCP47_LanguageTag_new_from_string(const char *str) +{ + BCP47_LanguageTag *LanguageTag; + const char *startchunk, *endstr; + char *chunk; + size_t len,i,j; + int extlangallowed; + + if (!str) + return NULL; + + LanguageTag = BCP47_LanguageTag_new(); + + if (is_grandfather(str)) + { + LanguageTag->grandfathered = x_strdup(str); + return LanguageTag; + } + + startchunk = str; + endstr = str + strlen(str); + chunk = firstchunk(&startchunk, &len); + + /*private use*/ + if (is_privateuse(len, chunk)) + { + LanguageTag->privateuse = BCP47_extension_new(); + LanguageTag->privateuse->singleton = chunk[0]; + free(chunk); + len = 0; + LanguageTag->privateuse->alphanums = BCP47_privateuse_from_string(startchunk); + if (!LanguageTag->privateuse->alphanums) + { + BCP47_LanguageTag_delete(LanguageTag); + return NULL; + } + return LanguageTag; + } + + if (len < 2 || len > 8 || !isALPHA(chunk)) + { + free(chunk); + BCP47_LanguageTag_delete(LanguageTag); + return NULL; + } + + extlangallowed = (len == 2 || len == 3); + + LanguageTag->langtag = BCP47_langtag_new(); + LanguageTag->langtag->language = chunk; + + chunk = nextchunk(&startchunk, &len); + + /*extlang*/ + if (extlangallowed) + { + /* + * Although the ABNF production 'extlang' permits up to three extended + * language tags in the language tag, extended language subtags MUST + * NOT include another extended language subtag in their 'Prefix'. + * That is, the second and third extended language subtag positions in + * a language tag are permanently reserved and tags that include those + * subtags in that position are, and will always remain, invalid. + * + * So, why not just ditch it in the grammar. Because it was earlier + * "extlang = *3("-" 3ALPHA) ; reserved for future use", and it doesn't + * want to break parsers which implemented that ? + */ + for (i = 0; i < MAX_ALLOWED_EXTLANGS && is_extlang(len, chunk); ++i) + { + LanguageTag->langtag->extlangs = (char**)realloc(LanguageTag->langtag->extlangs, sizeof(char*) * (i+2)); + LanguageTag->langtag->extlangs[i] = chunk; + LanguageTag->langtag->extlangs[i+1] = NULL; + chunk = nextchunk(&startchunk, &len); + } + } + + /*script*/ + if (is_script(len, chunk)) + { + LanguageTag->langtag->script = chunk; + chunk = nextchunk(&startchunk, &len); + } + + /*region*/ + if (is_region(len, chunk)) + { + LanguageTag->langtag->region = chunk; + chunk = nextchunk(&startchunk, &len); + } + + /*variants*/ + for (j = 0; is_variant(len, chunk); ++j) + { + for (i = 0; i < j && asciicasecmp(LanguageTag->langtag->variants[i], chunk) != 0; ++i) { } + /*already used this variant, invalid*/ + if (i != j) + break; + + LanguageTag->langtag->variants = (char**)realloc(LanguageTag->langtag->variants, sizeof(char*) * (j+2)); + LanguageTag->langtag->variants[j] = chunk; + LanguageTag->langtag->variants[j+1] = NULL; + chunk = nextchunk(&startchunk, &len); + } + + /*extension*/ + for (j = 0; is_singleton(len, chunk); ++j) + { + for (i = 0; i < j && toasciilower(LanguageTag->langtag->extensions[i]->singleton) != toasciilower(chunk[0]); ++i) { } + /*already used this singleton, invalid*/ + if (i != j) + break; + + LanguageTag->langtag->extensions = (BCP47_extension**)realloc(LanguageTag->langtag->extensions, sizeof(BCP47_extension**) * (j+2)); + LanguageTag->langtag->extensions[j] = BCP47_extension_new(); + LanguageTag->langtag->extensions[j]->singleton = chunk[0]; + LanguageTag->langtag->extensions[j+1] = NULL; + free(chunk); + + chunk = nextchunk(&startchunk, &len); + for (i = 0; is_28_alphanum(len, chunk); ++i) + { + LanguageTag->langtag->extensions[j]->alphanums = (char**)realloc(LanguageTag->langtag->extensions[j]->alphanums, sizeof(char*) * (i+2)); + LanguageTag->langtag->extensions[j]->alphanums[i] = chunk; + LanguageTag->langtag->extensions[j]->alphanums[i+1] = NULL; + chunk = nextchunk(&startchunk, &len); + } + + if (i == 0) + { + free(chunk); + BCP47_LanguageTag_delete(LanguageTag); + return NULL; + } + } + + /*privateuse*/ + if (is_privateuse(len, chunk)) + { + LanguageTag->langtag->privateuse = BCP47_extension_new(); + LanguageTag->langtag->privateuse->singleton = chunk[0]; + free(chunk); + len = 0; + LanguageTag->langtag->privateuse->alphanums = BCP47_privateuse_from_string(startchunk); + if (!LanguageTag->langtag->privateuse->alphanums) + { + BCP47_LanguageTag_delete(LanguageTag); + return NULL; + } + startchunk = endstr; + } + + if (startchunk != endstr || len != 0) + { + free(chunk); + BCP47_LanguageTag_delete(LanguageTag); + return NULL; + } + + return LanguageTag; +} + +static const char* find_redundant_replacement(const char *redundant) +{ + if (redundant) + { + size_t i; + for (i = 0; i < sizeof(lsr_redundant_tag_properties)/sizeof(redundant_tag_properties); ++i) + if (asciicasecmp(redundant, lsr_redundant_tag_properties[i].tag) == 0) + return lsr_redundant_tag_properties[i].preferred_value; + } + return NULL; +} + +static const lang_subtag_properties* lookup_lsr_lang_subtag_properties(const char *lang) +{ + if (lang) + { + size_t i; + for (i = 0; i < sizeof(lsr_lang_subtag_properties)/sizeof(lang_subtag_properties); ++i) + if (asciicasecmp(lang, lsr_lang_subtag_properties[i].subtag) == 0) + return &lsr_lang_subtag_properties[i]; + } + return NULL; +} + +static int compareextension(const void *a, const void *b) +{ + return ((BCP47_extension*)a)->singleton < ((BCP47_extension*)b)->singleton; +} + +BCP47_LanguageTag* BCP47_LanguageTag_new_canonical_from_string(const char *str) +{ + size_t i; + const char *replacement; + char *p; + BCP47_LanguageTag *ret; + const lang_subtag_properties *lang_subtag_props; + + if (!str) + return NULL; + + /* + * Redundant or grandfathered tags are replaced by their 'Preferred- + * Value', if there is one. + */ + replacement = canonical_grandfather(str); + if (replacement) + return BCP47_LanguageTag_new_from_string(replacement); + + replacement = find_redundant_replacement(str); + if (replacement) + return BCP47_LanguageTag_new_from_string(replacement); + + ret = BCP47_LanguageTag_new_from_string(str); + + if (!ret) + return NULL; + + if (ret->grandfathered) + { + for (p = ret->grandfathered; *p; ++p) + *p = toasciilower(*p); + return ret; + } + + if (ret->privateuse) + { + tolower_bcpextension(ret->privateuse); + return ret; + } + + for (p = ret->langtag->language; *p; ++p) + *p = toasciilower(*p); + + if (ret->langtag->extlangs) + { + const char *extlang; + + tolower_tag_array(ret->langtag->extlangs); + + extlang = ret->langtag->extlangs[0]; + + for (i = 0; i < sizeof(lsr_extlang_subtag_properties)/sizeof(extlang_subtag_properties); ++i) + { + if ( + (strcmp(extlang, lsr_extlang_subtag_properties[i].subtag) == 0) && + (strcmp(ret->langtag->language, lsr_extlang_subtag_properties[i].prefix) == 0) + ) + { + free(ret->langtag->language); + ret->langtag->language = x_strdup(lsr_extlang_subtag_properties[i].preferred_value); + free_tag_array(ret->langtag->extlangs); + ret->langtag->extlangs = NULL; + break; + } + } + } + + if (ret->langtag->script) + { + ret->langtag->script[0] = toasciiupper(ret->langtag->script[0]); + for (p = ret->langtag->script+1; *p; ++p) + *p = toasciilower(*p); + } + + lang_subtag_props = lookup_lsr_lang_subtag_properties(ret->langtag->language); + if (lang_subtag_props && lang_subtag_props->preferred_value[0]) + { + free(ret->langtag->language); + ret->langtag->language = x_strdup(lang_subtag_props->preferred_value); + lang_subtag_props = lookup_lsr_lang_subtag_properties(ret->langtag->language); + } + + if (ret->langtag->script && lang_subtag_props && lang_subtag_props->suppress_script[0]) + { + if (strcmp(ret->langtag->script, lang_subtag_props->suppress_script) == 0) + { + free(ret->langtag->script); + ret->langtag->script = NULL; + } + } + + if (ret->langtag->region) + { + for (p = ret->langtag->region; *p; ++p) + *p = toasciiupper(*p); + + for (i = 0; i < sizeof(lsr_region_subtag_properties)/sizeof(region_subtag_properties); ++i) + { + if (strcmp(ret->langtag->region, lsr_region_subtag_properties[i].subtag) == 0) + { + free(ret->langtag->region); + ret->langtag->region = x_strdup(lsr_region_subtag_properties[i].preferred_value); + break; + } + } + } + + if (ret->langtag->variants) + { + int removehepburn=0; + char **ps; + + tolower_tag_array (ret->langtag->variants); + + for (ps = ret->langtag->variants; *ps; ++ps) + { + for (i = 0; i < sizeof(lsr_variant_subtag_properties)/sizeof(variant_subtag_properties); ++i) + { + if (strcmp(*ps, lsr_variant_subtag_properties[i].subtag) == 0) + { + free(*ps); + *ps = x_strdup(lsr_variant_subtag_properties[i].preferred_value); + break; + } + } + } + + /* + * special-case in the hepburn-heploc -> hepburn-alalc97 -> alalc97 + * out-of-band Preferred form re its comment + */ + for (ps = ret->langtag->variants; *ps; ++ps) + { + if (strcmp(*ps, "alalc97") == 0) + { + removehepburn = 1; + break; + } + } + + if (removehepburn) + { + for (ps = ret->langtag->variants; *ps; ++ps) + { + if (strcmp(*ps, "hepburn") == 0) + { + do + { + *ps = *(ps+1); + } + while(*ps++); + break; + } + } + } + + + } + + if (ret->langtag->extensions) + { + BCP47_extension **pe; + for (pe = ret->langtag->extensions; *pe; ++pe) + tolower_bcpextension(*pe); + + /*Extension sequences are ordered into case-insensitive ASCII order by singleton subtag.*/ + qsort(ret->langtag->extensions, pe-ret->langtag->extensions, sizeof(BCP47_extension*), compareextension); + } + + tolower_bcpextension(ret->langtag->privateuse); + + return(ret); +} + + +BCP47_LanguageTag* BCP47_LanguageTag_new_canonical_extlang_form_from_string(const char *str) +{ + /* + * The language tag is first transformed into canonical form + */ + BCP47_LanguageTag *ret = BCP47_LanguageTag_new_canonical_from_string(str); + size_t i; + + if (ret->grandfathered || ret->privateuse) + return ret; + + /* + * If the language tag starts with a primary language subtag that is + * also an extlang subtag, then the language tag is prepended with the + * extlang's 'Prefix' + */ + for (i = 0; i < sizeof(lsr_extlang_subtag_properties)/sizeof(extlang_subtag_properties); ++i) + { + if (strcmp(ret->langtag->language, lsr_extlang_subtag_properties[i].subtag) == 0) + { + ret->langtag->extlangs = (char**)malloc(sizeof(char*) * (2)); + ret->langtag->extlangs[0] = ret->langtag->language; + ret->langtag->extlangs[1] = NULL; + + ret->langtag->language = x_strdup(lsr_extlang_subtag_properties[i].prefix); + break; + } + } + + return ret; +} + +char * BCP47_LanguageTag_to_string(const BCP47_LanguageTag *LanguageTag) +{ + if (!LanguageTag) + return NULL; + + if (LanguageTag->langtag) + return BCP47_langtag_to_string(LanguageTag->langtag); + else if (LanguageTag->privateuse) + { + size_t len; + char *p, *ret; + + len=len_tag_array(LanguageTag->privateuse->alphanums) + 2; + + ret = p = (char*)malloc(len); + + *p++ = LanguageTag->privateuse->singleton; + p+=write_tag_array(p, LanguageTag->privateuse->alphanums); + + return ret; + } + else if (LanguageTag->grandfathered) + return x_strdup(LanguageTag->grandfathered); + + return NULL; +} + +void BCP47_LanguageTag_print(const BCP47_LanguageTag *LanguageTag) +{ + if (!LanguageTag) + { + printf("\n"); + return; + } + + if (LanguageTag->langtag) + { + printf("language:%s ", LanguageTag->langtag->language); + + if (LanguageTag->langtag->extlangs) + { + char **ps; + for (ps = LanguageTag->langtag->extlangs; *ps; ++ps) + printf("extlang:%s ", *ps); + } + + printf("script:%s ", LanguageTag->langtag->script ? LanguageTag->langtag->script : ""); + printf("region:%s ", LanguageTag->langtag->region ? LanguageTag->langtag->region : ""); + + if (LanguageTag->langtag->variants) + { + char **ps; + for (ps = LanguageTag->langtag->variants; *ps; ++ps) + printf("variant:%s ", *ps); + } + + if (LanguageTag->langtag->extensions) + { + BCP47_extension **pe; + for (pe = LanguageTag->langtag->extensions; *pe; ++pe) + { + char **ps; + printf("singleton:%c ", (*pe)->singleton); + for (ps = (*pe)->alphanums; *ps; ++ps) + printf("extension-subtag:%s ", *ps); + } + } + + if (LanguageTag->langtag->privateuse) + { + char **ps; + printf("private:%c ", LanguageTag->langtag->privateuse->singleton); + for (ps = LanguageTag->langtag->privateuse->alphanums; *ps; ++ps) + printf("privateuse-subtag:%s ", *ps); + } + } + else if (LanguageTag->privateuse) + { + char **ps; + printf("private:%c ", LanguageTag->privateuse->singleton); + for (ps = LanguageTag->privateuse->alphanums; *ps; ++ps) + printf("privateuse-subtag:%s ", *ps); + } + else if (LanguageTag->grandfathered) + printf("grandfathered: %s ", LanguageTag->grandfathered); + + printf("\n"); +} + +void BCP47_LanguageTag_delete(BCP47_LanguageTag *LanguageTag) +{ + if (!LanguageTag) + return; + BCP47_extension_delete(LanguageTag->privateuse); + free(LanguageTag->grandfathered); + BCP47_langtag_delete(LanguageTag->langtag); + free(LanguageTag); +} + +/* +http://www.ethnologue.com/show_family.asp?subid=57-16 + +*/ +/* + * This is only "implemented" for the Berber languages seeing as that's the + * only collective language which ended up as a glibc locale. Doesn't seem to + * be any real standard, or convenient format lists to get a list of what + * languages might be considered part of a collective. These ones are just + * pulled from http://www.ethnologue.com/show_family.asp?subid=57-16 and + * assigned a weighting according to the first population number listed under + * each. + */ + + +BCP47_LanguageTag **BCP47_LanguageTag_expand_collective(const BCP47_LanguageTag *LanguageTag) +{ + BCP47_LanguageTag **ret = NULL; + + if (LanguageTag && LanguageTag->langtag) + { + static const char* const collective_map[][3] = + { + /* + * http://bugs.freedesktop.org/show_bug.cgi?id=19881 Algeria has + * standardized on Kabyle, writing in Latin. So kab_DZ for ber_DZ + * is a likely strong choice + * + * The situation in Morocco is far more complicated, the top three + * here are part of the education through Tifinagh project + * http://www.adrar.nl/indexEng.html but there's no clear contended. + * Top three MA entries are likely candidates + */ + { "ber", "shi", "MA" }, /* 3000 */ + { "ber", "tzm", "MA" }, /* 3000 */ + { "ber", "kab", "DZ" }, /* 2500 */ + { "ber", "rif", "MA" }, /* 1500 */ + { "ber", "shy", "DZ" }, /* 1400 */ + { "ber", "ttq", "NE" }, /* 450 */ + { "ber", "thz", "NE" }, /* 250 */ + { "ber", "taq", "ML" }, /* 250 */ + { "ber", "jbn", "LY" }, /* 184 */ + { "ber", "cnu", "DZ" }, /* 76 */ + { "ber", "mzb", "DZ" }, /* 70 */ + { "ber", "grr", "DZ" }, /* 40 */ + { "ber", "siz", "EG" }, /* 30 */ + { "ber", "thv", "DZ" }, /* 25 */ + { "ber", "gha", "LY" }, /* 10 */ + { "ber", "tia", "DZ" }, /* 9 */ + { "ber", "tjo", "DZ" }, /* 6 */ + { "ber", "swn", "LY" }, /* 6 */ + { "ber", "oua", "DZ" }, /* 5 */ + { "ber", "auj", "LY" }, /* 3 */ + { "ber", "jbe", "IL" }, /* 2 */ + { "ber", "gho", "MA" }, /* 0 */ + { "ber", "sjs", "MA" }, /* 0 */ + { "ber", "sds", "TN",} /* 0 */ + }; + size_t i, count=0; + for (i = 0; i < sizeof(collective_map)/sizeof(char*[3]); ++i) + { + if ( + (asciicasecmp(LanguageTag->langtag->language, collective_map[i][0]) == 0) && + ((LanguageTag->langtag->region == NULL) || (asciicasecmp(LanguageTag->langtag->region, collective_map[i][2]) == 0)) + ) + { + ++count; + ret = (BCP47_LanguageTag**)realloc(ret, (count + 1) * sizeof(BCP47_LanguageTag*)); + ret[count-1] = BCP47_LanguageTag_copy(LanguageTag); + free(ret[count-1]->langtag->language); + ret[count-1]->langtag->language = x_strdup(collective_map[i][1]); + } + } + if (count) + ret[count] = NULL; + if (!ret && LanguageTag->langtag->region) + { + BCP47_LanguageTag *regionless = BCP47_LanguageTag_copy(LanguageTag); + free(regionless->langtag->region); + regionless->langtag->region = NULL; + ret = BCP47_LanguageTag_expand_collective(regionless); + BCP47_LanguageTag_delete(regionless); + } + } + + return ret; +} + +char BCP47_LanguageTag_type(const BCP47_LanguageTag *LanguageTag) +{ + const char *lang; + const lang_subtag_properties* props; + + if (LanguageTag->grandfathered) + return 'i'; + else if (LanguageTag->privateuse) + return 'p'; + + lang = LanguageTag->langtag->language; + + /*qaa..qtz, Private use*/ + if (lang && strlen(lang) == 3 && lang[0] == 'q' && lang[1] <= 't' && lang[2] <= 'z') + return 'p'; + props = lookup_lsr_lang_subtag_properties(lang); + return props ? props->scope : 'i'; +} + +void posixlocale_delete(posixlocale *posloc) +{ + if (!posloc) + return; + free(posloc->main_str); + free(posloc->underscore_str); + free(posloc->dot_str); + free(posloc->at_str); + free(posloc); +} + +/* + * http://www.opengroup.org/onlinepubs/007908799/xbd/envvar.html + * + * The Single UNIX ® Specification, Version 2: Environment Variables + * + * If the locale value has the form: + * + * language[_territory][.codeset] + * + * it refers to an implementation-provided locale, where settings of language, + * territory and codeset are implementation-dependent. + * + * ... defined to accept an additional field "@modifier ", which allows the + * user to select a specific instance of localisation data within a single + * category (for example, for selecting the dictionary as opposed to the + * character ordering of data). The syntax for these environment variables is + * thus defined as: + * + * [language[_territory][.codeset][@modifier]] + * + * http://www.gnu.org/s/libc/manual/html_node/Using-gettextized-software.html + * + * 8.2.1.6 User influence on gettext: + * + * The functions recognize the format of the value of the environment variable. + * It can split the value is different pieces and by leaving out the only or + * the other part it can construct new values. This happens of course in a + * predictable way. To understand this one must know the format of the + * environment variable value. There is one more or less standardized form, + * originally from the X/Open specification: + * + * language[_territory[.codeset]][@modifier] + * + */ +posixlocale* posixlocale_new_from_string(const char *locale) +{ + const char *startchunk, *endchunk; + posixlocale* posloc; + + posloc = posixlocale_new(); + + startchunk = endchunk = locale; + + while (*endchunk && *endchunk != '_' && *endchunk != '.' && *endchunk != '@') + ++endchunk; + + if (endchunk - startchunk) + posloc->main_str = x_strndup(startchunk, endchunk - startchunk); + + if (*endchunk == '_') + { + startchunk = ++endchunk; + while (*endchunk && *endchunk != '@' && *endchunk != '.') + ++endchunk; + + if (endchunk - startchunk) + posloc->underscore_str = x_strndup(startchunk, endchunk - startchunk); + } + + if (*endchunk == '.') + { + startchunk = ++endchunk; + while (*endchunk && *endchunk != '@') + ++endchunk; + + if (endchunk - startchunk) + posloc->dot_str = x_strndup(startchunk, endchunk - startchunk); + } + + if (*endchunk == '@') + { + startchunk = ++endchunk; + + if (strlen(startchunk)) + posloc->at_str = x_strdup(startchunk); + } + + return posloc; +} + +posixlocale* posixlocale_new_canonical_from_string(const char *locale) +{ + posixlocale *ret = posixlocale_new_from_string(resolve_gettext_locale_aliases(locale)); + if (ret) + { + const char *at_str = fix_broken_modifier(ret->at_str); + if (at_str != ret->at_str) + { + free(ret->at_str); + ret->at_str = x_strdup(at_str); + } + } + return ret; +} + +char * posixlocale_to_string(const posixlocale *posloc) +{ + char *ret, *p; + size_t len, main_len, underscore_len, dot_len, at_len; + + main_len = strlen(posloc->main_str); + underscore_len = posloc->underscore_str ? strlen(posloc->underscore_str) + 1 : 0; + dot_len = posloc->dot_str ? strlen(posloc->dot_str) + 1 : 0; + at_len = posloc->at_str ? strlen(posloc->at_str) + 1 : 0; + len = main_len + underscore_len + dot_len + at_len + 1; + + p = ret = (char*)malloc(len); + + strcpy(ret, posloc->main_str); + p+=main_len; + if (underscore_len) + { + *p++ = '_'; + strcpy(p, posloc->underscore_str); + p+=underscore_len-1; + } + if (dot_len) + { + *p++ = '.'; + strcpy(p, posloc->dot_str); + p+=dot_len-1; + } + if (at_len) + { + *p++ = '@'; + strcpy(p, posloc->at_str); + } + + return ret; +} + +posixlocale* posixlocale_new() +{ + return (posixlocale*)calloc(1, sizeof(posixlocale)); +} + +BCP47_extension *BCP47_extension_new() +{ + return (BCP47_extension*)calloc(1, sizeof(BCP47_extension)); +} + +void BCP47_extension_delete(BCP47_extension *extension) +{ + if (!extension) + return; + free_tag_array(extension->alphanums); + free(extension); +} + +BCP47_extension *BCP47_extension_copy(const BCP47_extension *extension) +{ + BCP47_extension *ret = NULL; + + if (extension) + { + ret = BCP47_extension_new(); + ret->singleton = extension->singleton; + ret->alphanums = copy_tag_array(extension->alphanums); + } + return ret; +} + +char **BCP47_generate_fallback_strings(const char *tag, int tagpin) +{ + size_t size=0; + char **ret, **pp; + char *canonical_str; + char *canonical_ext_str; + BCP47_LanguageTag *canonical_tag=NULL, *canonical_ext_tag=NULL; + + if (!tag) + return NULL; + + canonical_tag = BCP47_LanguageTag_new_canonical_from_string(tag); + + if (!canonical_tag) + return NULL; + + canonical_str = BCP47_LanguageTag_to_string(canonical_tag); + canonical_ext_tag = BCP47_LanguageTag_new_canonical_extlang_form_from_string(tag); + canonical_ext_str = BCP47_LanguageTag_to_string(canonical_ext_tag); + + if (strcmp(canonical_str, canonical_ext_str) == 0) + { + free(canonical_ext_str); + BCP47_LanguageTag_delete(canonical_ext_tag); + canonical_ext_tag = NULL; + } + + size = 1; + if (canonical_tag->grandfathered) /*max of two extra options*/ + size += 2; + else if (canonical_tag->privateuse) + { + if (!(tagpin & BCP47_TAG_PIN_PRIVATEUSE)) + size += count_tag_array_elems(canonical_tag->privateuse->alphanums); + } + else + { + if (canonical_tag->langtag->privateuse && !(tagpin & BCP47_TAG_PIN_PRIVATEUSE)) + size += count_tag_array_elems(canonical_tag->langtag->privateuse->alphanums); + + if (canonical_tag->langtag->extensions && !(tagpin & BCP47_TAG_PIN_EXTENSIONS)) + { + BCP47_extension **pe; + for (pe = canonical_tag->langtag->extensions; *pe; ++pe) + size += count_tag_array_elems((*pe)->alphanums); + } + + if (canonical_tag->langtag->variants && !(tagpin & BCP47_TAG_PIN_VARIANTS)) + size += count_tag_array_elems(canonical_tag->langtag->variants); + + if (canonical_tag->langtag->region && !(tagpin & BCP47_TAG_PIN_REGION)) + ++size; + + if (canonical_tag->langtag->script && !(tagpin & BCP47_TAG_PIN_SCRIPT)) + ++size; + + if (canonical_ext_tag) + size = size * 2 + 1; + } + ++size; + + ret = pp = (char**)malloc(size * sizeof(char*)); + *pp++ = canonical_str; + if (canonical_ext_tag) + *pp++ = canonical_ext_str; + + if (canonical_tag->grandfathered) + { + if (strcmp("en-GB-oed", canonical_tag->grandfathered) == 0) + { + *pp++ = x_strdup("en-GB"); + if (!(tagpin & BCP47_TAG_PIN_REGION)) + *pp++ = x_strdup("en"); + } + else if (strcmp("i-mingo", canonical_tag->grandfathered) == 0) + { + /* + * Mingo is a dialect of Seneca, so "see" might be a helpful + * fallback in its absence + */ + *pp++ = x_strdup("see"); + } + else if (strcmp("zh-min", canonical_tag->grandfathered) == 0) + *pp++ = x_strdup("zh"); + + *pp = NULL; + } + else if (canonical_tag->privateuse) + { + if (!(tagpin & BCP47_TAG_PIN_PRIVATEUSE)) + { + char **ps; + ps = canonical_tag->privateuse->alphanums; + while (*ps) + ++ps; + while (--ps != canonical_tag->privateuse->alphanums) + { + free(*ps); + *ps=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + } + } + } + else + { + if (canonical_tag->langtag->privateuse && !(tagpin & BCP47_TAG_PIN_PRIVATEUSE)) + { + char **pcs, **pes; + pcs = canonical_tag->langtag->privateuse->alphanums; + pes = canonical_ext_tag ? canonical_ext_tag->langtag->privateuse->alphanums : NULL; + while (*pcs) + { + ++pcs; + ++pes; + } + for(--pcs, --pes; pcs != canonical_tag->langtag->privateuse->alphanums; --pcs, --pes) + { + free(*pcs); + *pcs = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(*pes); + *pes = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + BCP47_extension_delete(canonical_tag->langtag->privateuse); + canonical_tag->langtag->privateuse=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + BCP47_extension_delete(canonical_ext_tag->langtag->privateuse); + canonical_ext_tag->langtag->privateuse=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + + if (canonical_tag->langtag->extensions && !(tagpin & BCP47_TAG_PIN_EXTENSIONS)) + { + BCP47_extension **pce, **pee; + pee = canonical_ext_tag ? canonical_ext_tag->langtag->extensions: NULL; + for (pce = canonical_tag->langtag->extensions; *pce; ++pce, ++pee) + { + char **pcs, **pes; + pcs = (*pce)->alphanums; + pes = canonical_ext_tag ? (*pee)->alphanums : NULL; + while (*pcs) + { + ++pcs; + ++pes; + } + for(--pcs, --pes; pcs != (*pce)->alphanums; --pcs, --pes) + { + free(*pcs); + *pcs = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(*pes); + *pes = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + BCP47_extension_delete(*pce); + *pce=NULL; + if (canonical_ext_tag) + { + BCP47_extension_delete(*pee); + *pee=NULL; + } + } + free(canonical_tag->langtag->extensions); + canonical_tag->langtag->extensions = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(canonical_ext_tag->langtag->extensions); + canonical_ext_tag->langtag->extensions = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + + if (canonical_tag->langtag->variants && !(tagpin & BCP47_TAG_PIN_VARIANTS)) + { + char **pcs, **pes; + pcs = canonical_tag->langtag->variants; + pes = canonical_ext_tag ? canonical_ext_tag->langtag->variants : NULL; + while (*pcs) + { + ++pcs; + ++pes; + } + for(--pcs, --pes; pcs != canonical_tag->langtag->variants; --pcs, --pes) + { + free(*pcs); + *pcs = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(*pes); + *pes = NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + free_tag_array(canonical_tag->langtag->variants); + canonical_tag->langtag->variants=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free_tag_array(canonical_ext_tag->langtag->variants); + canonical_ext_tag->langtag->variants=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + + if (canonical_tag->langtag->region && !(tagpin & BCP47_TAG_PIN_REGION)) + { + free(canonical_tag->langtag->region); + canonical_tag->langtag->region=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(canonical_ext_tag->langtag->region); + canonical_ext_tag->langtag->region=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + + if (canonical_tag->langtag->script && !(tagpin & BCP47_TAG_PIN_SCRIPT)) + { + free(canonical_tag->langtag->script); + canonical_tag->langtag->script=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_tag); + if (canonical_ext_tag) + { + free(canonical_ext_tag->langtag->script); + canonical_ext_tag->langtag->script=NULL; + *pp++ = BCP47_LanguageTag_to_string(canonical_ext_tag); + } + } + + if (canonical_ext_tag) + *pp++ = x_strdup(canonical_ext_tag->langtag->language); + } + + BCP47_LanguageTag_delete(canonical_tag); + BCP47_LanguageTag_delete(canonical_ext_tag); + + *pp = NULL; + return ret; +} --- /dev/null 2010-05-01 11:49:38.510178306 +0100 +++ src/tools/localehelper.h 2010-05-20 15:29:09.000000000 +0100 @@ -0,0 +1,256 @@ +/* localehelper - Library of useful routines for BCP 47 + * Copyright (C) 2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef LOCALE_HELPER_H +#define LOCALE_HELPER_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _posixlocale +{ + char *main_str; + char *underscore_str; + char *dot_str; + char *at_str; +} posixlocale; + +/* + * Create an empty posixlocale descriptor + * + * free return value with posixlocale_delete + */ +posixlocale* posixlocale_new(); +/* + * Split a locale string according to POSIX:2008 + * http://www.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html + * + * i.e. [language[_territory][.codeset][@modifier]] + * + * round-trip back with posixlocale_to_string + * + * use posixlocale_new_canonical_from_string to substitute known old school + * unix locale strings into more modern equivalents to get more useful + * information, though this removes any round-trip capability. + * + * free return value with posixlocale_delete + */ +posixlocale* posixlocale_new_from_string(const char *locale); +/* + * Convert known old-school unix locale names into langage_territory.ENCODING + * equivalents like gettext does before converting via + * posixlocale_new_from_string + * + * free return value with posixlocale_delete + */ +posixlocale* posixlocale_new_canonical_from_string(const char *locale); +/* + * Convert split locale to POSIX:2008 string + * + * round-trip back with posixlocale_new_from_string + */ +char* posixlocale_to_string(const posixlocale *posloc); + +void posixlocale_delete(posixlocale *posloc); + +typedef struct _BCP47_langtag BCP47_langtag; +typedef struct _BCP47_extension BCP47_extension; + +typedef struct _BCP47_LanguageTag +{ + BCP47_langtag *langtag; + BCP47_extension *privateuse; + char *grandfathered; +} BCP47_LanguageTag; + +/* + * Create an empty BCP 47 Language-Tag + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag* BCP47_LanguageTag_new(); +/* + * Parse a string that purports to be a BCP 47 Language Tag. Returns + * NULL if ill-formed. + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag* BCP47_LanguageTag_new_from_string(const char *str); +/* + * Make the Preferred-Value substitutions of RFC 5646 from IANA language subtag + * registry, including extlang sequence removal via + * "Canonicalization of Language Tags" section 4.5 + * + * Remove script entry if it matches Suppress-Script for that language + * + * Format the case of tags according to the recommended format. + * [ISO639-1] recommends that language codes be written in lowercase + * [ISO15924] recommends that script codes use lowercase with the initial letter capitalized + * [ISO3166-1] recommends that country codes be capitalized + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag* BCP47_LanguageTag_new_canonical_from_string(const char *str); +/* + * There is an alternate 'extlang form' that maintains or reinstates extlang + * subtags. This form can be useful in environments where the presence of the + * 'Prefix' subtag is considered beneficial in matching or selection + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag* BCP47_LanguageTag_new_canonical_extlang_form_from_string(const char *str); +/* + * Copy a BCP 47 Language-Tag + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag *BCP47_LanguageTag_copy(const BCP47_LanguageTag *LanguageTag); +/* + * Convert a BCP47_LanguageTag to a correctly formatted BCP 47 LanguageTag + * string + * + * free return value with free + */ +char * BCP47_LanguageTag_to_string(const BCP47_LanguageTag *LanguageTag); +/* + * Convert a posixlocale descriptor to a BCP 47 Language-Tag + * + * The results should be a valid BCP 47 Language-Tag that correctly identifies + * the Language described by the posixlocale descriptor. Unfortunately this may + * be a collective language in the case of posixlocale created from e.g. ber_MA + * as seen in glibc, depending on application this may be directly useful, if + * not see BCP47_LanguageTag_expand_collective + * + * free return value with BCP47_LanguageTag_delete + */ +BCP47_LanguageTag *BCP47_LanguageTag_new_from_posixlocale(const posixlocale *posloc); + +/* + * Returns a NULL terminated array of BCP 48 format char* fallbacks for this + * LanguageTag. + * + * Use stopatscript=1 to not fallback beyond an included script tag. 1 is + * useful for scenarios where a different script is useless, e.g. spell-checking + * while 0 is useful where a different script may be acceptable, e.g. output + * language + * + * e.g. language display fallbacks, nothing pinned BCP47_TAG_PIN_NONE + * sr-Latn-RS gives + * sr-Latn-RS + * sr-Latn + * sr + * + * ca-Brai-ES-valencia-i-apple-banana-x-apple-banana gives + * ca-Brai-ES-valencia-i-apple-banana-x-apple-banana + * ca-Brai-ES-valencia-i-apple-banana-x-apple + * ca-Brai-ES-valencia-i-apple-banana + * ca-Brai-ES-valencia-i-apple + * ca-Brai-ES-valencia + * ca-Brai-ES + * ca-Brai + * ca + * + * cmn-Cyrl-TW gives + * cmn-Cyrl-TW + * zh-cmn-Cyrl-TW + * cmn-Cyrl + * zh-cmn-Cyrl + * cmn + * zh-cmn + * zh + * + * e.g. spellchecking. pin script and variants BCP47_TAG_PIN_SCRIPT|BCP47_TAG_PIN_VARIANTS + * sr-Latn-RS gives + * sr-Latn-RS + * sr-Latn + * + * ca-Brai-ES-valencia-i-apple-banana-x-apple-banana gives + * ca-Brai-ES-valencia-i-apple-banana-x-apple-banana + * ca-Brai-ES-valencia-i-apple-banana-x-apple + * ca-Brai-ES-valencia-i-apple-banana + * ca-Brai-ES-valencia-i-apple + * ca-Brai-ES-valencia + * ca-Brai-valencia + * + * cmn-Cyrl-TW gives + * cmn-Cyrl-TW + * zh-cmn-Cyrl-TW + * cmn-Cyrl + * zh-cmn-Cyrl + */ + +typedef enum { + BCP47_TAG_PIN_NONE = 0, + BCP47_TAG_PIN_SCRIPT = (1 << 0), + BCP47_TAG_PIN_REGION = (1 << 1), + BCP47_TAG_PIN_VARIANTS = (1 << 2), + BCP47_TAG_PIN_EXTENSIONS = (1 << 3), + BCP47_TAG_PIN_PRIVATEUSE = (1 << 4) +} BCP47_Tag_Pin; + +char **BCP47_generate_fallback_strings(const char *tag, int tagpin); +/* + * Return 'c' for a known collective language + * Return 'm' for a known macro language + * Return 'p' for a known private language + * Return 'i' for a individual language + */ +char BCP47_LanguageTag_type(const BCP47_LanguageTag *LanguageTag); +/* + * Returns a NULL terminated array of BCP47_LanguageTag* sorted in order of + * likelihood the known members of the collective language described by the + * LanguageTag. Free each member with posixlocale_delete and the array itself + * with free. + */ +BCP47_LanguageTag **BCP47_LanguageTag_expand_collective(const BCP47_LanguageTag *LanguageTag); + +void BCP47_LanguageTag_delete(BCP47_LanguageTag *LanguageTag); + +struct _BCP47_extension +{ + char singleton; + char **alphanums; +}; + +BCP47_extension *BCP47_extension_new(); +void BCP47_extension_delete(BCP47_extension *extension); +BCP47_extension *BCP47_extension_copy(const BCP47_extension *extension); + +struct _BCP47_langtag +{ + char *language; + char **extlangs; + char *script; + char *region; + char **variants; + BCP47_extension **extensions; + BCP47_extension *privateuse; +}; + +BCP47_langtag* BCP47_langtag_new(); +void BCP47_langtag_delete(BCP47_langtag *langtag); +BCP47_langtag* BCP47_langtag_copy(const BCP47_langtag *langtag); +char *BCP47_langtag_to_string(BCP47_langtag *langtag); + +#ifdef __cplusplus +} +#endif + +#endif --- /dev/null 2010-05-01 11:49:38.510178306 +0100 +++ src/tools/subtag_properties.h 2010-05-20 15:29:05.000000000 +0100 @@ -0,0 +1,664 @@ +/* localehelper - Library of useful routines for BCP 47 + * Copyright (C) 2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ +typedef struct _lang_subtag_properties +{ + const char *subtag; + const char *suppress_script; + const char *preferred_value; + const char scope; +}lang_subtag_properties; + +static const lang_subtag_properties lsr_lang_subtag_properties [] = +{ + { "ab", "Cyrl", "", 'i' } , + { "af", "Latn", "", 'i' } , + { "ak", "", "", 'm' } , + { "am", "Ethi", "", 'i' } , + { "ar", "Arab", "", 'm' } , + { "as", "Beng", "", 'i' } , + { "ay", "Latn", "", 'm' } , + { "az", "", "", 'm' } , + { "be", "Cyrl", "", 'i' } , + { "bg", "Cyrl", "", 'i' } , + { "bh", "", "", 'c' } , + { "bn", "Beng", "", 'i' } , + { "bs", "Latn", "", 'i' } , + { "ca", "Latn", "", 'i' } , + { "ch", "Latn", "", 'i' } , + { "cr", "", "", 'm' } , + { "cs", "Latn", "", 'i' } , + { "cy", "Latn", "", 'i' } , + { "da", "Latn", "", 'i' } , + { "de", "Latn", "", 'i' } , + { "dv", "Thaa", "", 'i' } , + { "dz", "Tibt", "", 'i' } , + { "el", "Grek", "", 'i' } , + { "en", "Latn", "", 'i' } , + { "eo", "Latn", "", 'i' } , + { "es", "Latn", "", 'i' } , + { "et", "Latn", "", 'm' } , + { "eu", "Latn", "", 'i' } , + { "fa", "Arab", "", 'm' } , + { "ff", "", "", 'm' } , + { "fi", "Latn", "", 'i' } , + { "fj", "Latn", "", 'i' } , + { "fo", "Latn", "", 'i' } , + { "fr", "Latn", "", 'i' } , + { "fy", "Latn", "", 'i' } , + { "ga", "Latn", "", 'i' } , + { "gl", "Latn", "", 'i' } , + { "gn", "Latn", "", 'm' } , + { "gu", "Gujr", "", 'i' } , + { "gv", "Latn", "", 'i' } , + { "he", "Hebr", "", 'i' } , + { "hi", "Deva", "", 'i' } , + { "hr", "Latn", "", 'i' } , + { "ht", "Latn", "", 'i' } , + { "hu", "Latn", "", 'i' } , + { "hy", "Armn", "", 'i' } , + { "id", "Latn", "", 'i' } , + { "ik", "", "", 'm' } , + { "in", "Latn", "id", 'i' } , + { "is", "Latn", "", 'i' } , + { "it", "Latn", "", 'i' } , + { "iu", "", "", 'm' } , + { "iw", "Hebr", "he", 'i' } , + { "ja", "Jpan", "", 'i' } , + { "ji", "", "yi", 'i' } , + { "jw", "", "jv", 'i' } , + { "ka", "Geor", "", 'i' } , + { "kg", "", "", 'm' } , + { "kk", "Cyrl", "", 'i' } , + { "kl", "Latn", "", 'i' } , + { "km", "Khmr", "", 'i' } , + { "kn", "Knda", "", 'i' } , + { "ko", "Kore", "", 'i' } , + { "kr", "", "", 'm' } , + { "ku", "", "", 'm' } , + { "kv", "", "", 'm' } , + { "la", "Latn", "", 'i' } , + { "lb", "Latn", "", 'i' } , + { "ln", "Latn", "", 'i' } , + { "lo", "Laoo", "", 'i' } , + { "lt", "Latn", "", 'i' } , + { "lv", "Latn", "", 'm' } , + { "mg", "Latn", "", 'm' } , + { "mh", "Latn", "", 'i' } , + { "mk", "Cyrl", "", 'i' } , + { "ml", "Mlym", "", 'i' } , + { "mn", "", "", 'm' } , + { "mo", "Latn", "ro", 'i' } , + { "mr", "Deva", "", 'i' } , + { "ms", "Latn", "", 'm' } , + { "mt", "Latn", "", 'i' } , + { "my", "Mymr", "", 'i' } , + { "na", "Latn", "", 'i' } , + { "nb", "Latn", "", 'i' } , + { "nd", "Latn", "", 'i' } , + { "ne", "Deva", "", 'i' } , + { "nl", "Latn", "", 'i' } , + { "nn", "Latn", "", 'i' } , + { "no", "Latn", "", 'm' } , + { "nr", "Latn", "", 'i' } , + { "ny", "Latn", "", 'i' } , + { "oj", "", "", 'm' } , + { "om", "Latn", "", 'm' } , + { "or", "Orya", "", 'i' } , + { "pa", "Guru", "", 'i' } , + { "pl", "Latn", "", 'i' } , + { "ps", "Arab", "", 'm' } , + { "pt", "Latn", "", 'i' } , + { "qu", "Latn", "", 'm' } , + { "rn", "Latn", "", 'i' } , + { "ro", "Latn", "", 'i' } , + { "ru", "Cyrl", "", 'i' } , + { "rw", "Latn", "", 'i' } , + { "sc", "", "", 'm' } , + { "sg", "Latn", "", 'i' } , + { "sh", "", "", 'm' } , + { "si", "Sinh", "", 'i' } , + { "sk", "Latn", "", 'i' } , + { "sl", "Latn", "", 'i' } , + { "sm", "Latn", "", 'i' } , + { "so", "Latn", "", 'i' } , + { "sq", "Latn", "", 'm' } , + { "ss", "Latn", "", 'i' } , + { "st", "Latn", "", 'i' } , + { "sv", "Latn", "", 'i' } , + { "sw", "Latn", "", 'm' } , + { "ta", "Taml", "", 'i' } , + { "te", "Telu", "", 'i' } , + { "th", "Thai", "", 'i' } , + { "ti", "Ethi", "", 'i' } , + { "tl", "Latn", "", 'i' } , + { "tn", "Latn", "", 'i' } , + { "to", "Latn", "", 'i' } , + { "tr", "Latn", "", 'i' } , + { "ts", "Latn", "", 'i' } , + { "uk", "Cyrl", "", 'i' } , + { "ur", "Arab", "", 'i' } , + { "uz", "", "", 'm' } , + { "ve", "Latn", "", 'i' } , + { "vi", "Latn", "", 'i' } , + { "wo", "Latn", "", 'i' } , + { "xh", "Latn", "", 'i' } , + { "yi", "Hebr", "", 'm' } , + { "za", "", "", 'm' } , + { "zh", "", "", 'm' } , + { "zu", "Latn", "", 'i' } , + { "aav", "", "", 'c' } , + { "afa", "", "", 'c' } , + { "alg", "", "", 'c' } , + { "alv", "", "", 'c' } , + { "apa", "", "", 'c' } , + { "aqa", "", "", 'c' } , + { "aql", "", "", 'c' } , + { "art", "", "", 'c' } , + { "ath", "", "", 'c' } , + { "auf", "", "", 'c' } , + { "aus", "", "", 'c' } , + { "awd", "", "", 'c' } , + { "azc", "", "", 'c' } , + { "bad", "", "", 'c' } , + { "bai", "", "", 'c' } , + { "bal", "", "", 'm' } , + { "bat", "", "", 'c' } , + { "ber", "", "", 'c' } , + { "bik", "", "", 'm' } , + { "bnc", "", "", 'm' } , + { "bnt", "", "", 'c' } , + { "btk", "", "", 'c' } , + { "bua", "", "", 'm' } , + { "cai", "", "", 'c' } , + { "cau", "", "", 'c' } , + { "cba", "", "", 'c' } , + { "ccn", "", "", 'c' } , + { "ccs", "", "", 'c' } , + { "cdc", "", "", 'c' } , + { "cdd", "", "", 'c' } , + { "cel", "", "", 'c' } , + { "chm", "", "", 'm' } , + { "cjr", "", "mom", 'i' } , + { "cmc", "", "", 'c' } , + { "cmk", "", "xch", 'i' } , + { "cpe", "", "", 'c' } , + { "cpf", "", "", 'c' } , + { "cpp", "", "", 'c' } , + { "crp", "", "", 'c' } , + { "csu", "", "", 'c' } , + { "cus", "", "", 'c' } , + { "day", "", "", 'c' } , + { "del", "", "", 'm' } , + { "den", "", "", 'm' } , + { "din", "", "", 'm' } , + { "dmn", "", "", 'c' } , + { "doi", "", "", 'm' } , + { "dra", "", "", 'c' } , + { "drh", "", "khk", 'i' } , + { "drw", "", "prs", 'i' } , + { "dsb", "Latn", "", 'i' } , + { "egx", "", "", 'c' } , + { "esx", "", "", 'c' } , + { "euq", "", "", 'c' } , + { "fiu", "", "", 'c' } , + { "fox", "", "", 'c' } , + { "frr", "Latn", "", 'i' } , + { "frs", "Latn", "", 'i' } , + { "gav", "", "dev", 'i' } , + { "gba", "", "", 'm' } , + { "gem", "", "", 'c' } , + { "gme", "", "", 'c' } , + { "gmq", "", "", 'c' } , + { "gmw", "", "", 'c' } , + { "gon", "", "", 'm' } , + { "grb", "", "", 'm' } , + { "grk", "", "", 'c' } , + { "gsw", "Latn", "", 'i' } , + { "hai", "", "", 'm' } , + { "him", "", "", 'c' } , + { "hmn", "", "", 'm' } , + { "hmx", "", "", 'c' } , + { "hok", "", "", 'c' } , + { "hsb", "Latn", "", 'i' } , + { "hyx", "", "", 'c' } , + { "iir", "", "", 'c' } , + { "ijo", "", "", 'c' } , + { "inc", "", "", 'c' } , + { "ine", "", "", 'c' } , + { "ira", "", "", 'c' } , + { "iro", "", "", 'c' } , + { "itc", "", "", 'c' } , + { "jpx", "", "", 'c' } , + { "jrb", "", "", 'm' } , + { "kar", "", "", 'c' } , + { "kdo", "", "", 'c' } , + { "khi", "", "", 'c' } , + { "kln", "", "", 'm' } , + { "kok", "Deva", "", 'm' } , + { "kpe", "", "", 'm' } , + { "kro", "", "", 'c' } , + { "lah", "", "", 'm' } , + { "luy", "", "", 'm' } , + { "mai", "Deva", "", 'i' } , + { "man", "", "", 'm' } , + { "map", "", "", 'c' } , + { "men", "Latn", "", 'i' } , + { "mis", "", "", 's' } , + { "mkh", "", "", 'c' } , + { "mno", "", "", 'c' } , + { "mst", "", "mry", 'i' } , + { "mul", "", "", 's' } , + { "mun", "", "", 'c' } , + { "mwr", "", "", 'm' } , + { "myn", "", "", 'c' } , + { "myt", "", "mry", 'i' } , + { "nah", "", "", 'c' } , + { "nai", "", "", 'c' } , + { "nds", "Latn", "", 'i' } , + { "ngf", "", "", 'c' } , + { "nic", "", "", 'c' } , + { "niu", "Latn", "", 'i' } , + { "nqo", "Nkoo", "", 'i' } , + { "nso", "Latn", "", 'i' } , + { "nub", "", "", 'c' } , + { "omq", "", "", 'c' } , + { "omv", "", "", 'c' } , + { "oto", "", "", 'c' } , + { "paa", "", "", 'c' } , + { "phi", "", "", 'c' } , + { "plf", "", "", 'c' } , + { "poz", "", "", 'c' } , + { "pqe", "", "", 'c' } , + { "pqw", "", "", 'c' } , + { "pra", "", "", 'c' } , + { "qwe", "", "", 'c' } , + { "raj", "", "", 'm' } , + { "roa", "", "", 'c' } , + { "rom", "", "", 'm' } , + { "sai", "", "", 'c' } , + { "sal", "", "", 'c' } , + { "sdv", "", "", 'c' } , + { "sem", "", "", 'c' } , + { "sgn", "", "", 'c' } , + { "sio", "", "", 'c' } , + { "sit", "", "", 'c' } , + { "sla", "", "", 'c' } , + { "smi", "", "", 'c' } , + { "son", "", "", 'c' } , + { "sqj", "", "", 'c' } , + { "ssa", "", "", 'c' } , + { "syd", "", "", 'c' } , + { "syr", "", "", 'm' } , + { "tai", "", "", 'c' } , + { "tbq", "", "", 'c' } , + { "tem", "Latn", "", 'i' } , + { "tkl", "Latn", "", 'i' } , + { "tmh", "Latn", "", 'm' } , + { "tnf", "", "prs", 'i' } , + { "tpi", "Latn", "", 'i' } , + { "trk", "", "", 'c' } , + { "tup", "", "", 'c' } , + { "tut", "", "", 'c' } , + { "tuw", "", "", 'c' } , + { "tvl", "Latn", "", 'i' } , + { "und", "", "", 's' } , + { "urj", "", "", 'c' } , + { "wak", "", "", 'c' } , + { "wen", "", "", 'c' } , + { "xgn", "", "", 'c' } , + { "xnd", "", "", 'c' } , + { "ypk", "", "", 'c' } , + { "zap", "", "", 'm' } , + { "zbl", "Blis", "", 'i' } , + { "zhx", "", "", 'c' } , + { "zle", "", "", 'c' } , + { "zls", "", "", 'c' } , + { "zlw", "", "", 'c' } , + { "znd", "", "", 'c' } , + { "zxx", "", "", 's' } , + { "zza", "", "", 'm' } +}; +typedef struct _grandfathered_tag_properties +{ + const char *tag; + const char *preferred_value; +}grandfathered_tag_properties; + +static const grandfathered_tag_properties lsr_grandfather_tag_properties [] = +{ + { "art-lojban", "jbo" } , + { "cel-gaulish", NULL } , + { "en-GB-oed", NULL } , + { "i-ami", "ami" } , + { "i-bnn", "bnn" } , + { "i-default", NULL } , + { "i-enochian", NULL } , + { "i-hak", "hak" } , + { "i-klingon", "tlh" } , + { "i-lux", "lb" } , + { "i-mingo", NULL } , + { "i-navajo", "nv" } , + { "i-pwn", "pwn" } , + { "i-tao", "tao" } , + { "i-tay", "tay" } , + { "i-tsu", "tsu" } , + { "no-bok", "nb" } , + { "no-nyn", "nn" } , + { "sgn-BE-FR", "sfb" } , + { "sgn-BE-NL", "vgt" } , + { "sgn-CH-DE", "sgg" } , + { "zh-guoyu", "cmn" } , + { "zh-hakka", "hak" } , + { "zh-min", NULL } , + { "zh-min-nan", "nan" } , + { "zh-xiang", "hsn" } +}; +typedef struct _region_subtag_properties +{ + const char *subtag; + const char *preferred_value; +}region_subtag_properties; + +static const region_subtag_properties lsr_region_subtag_properties [] = +{ + { "BU", "MM" } , + { "DD", "DE" } , + { "FX", "FR" } , + { "TP", "TL" } , + { "YD", "YE" } , + { "ZR", "CD" } +}; +typedef struct _redundant_tag_properties +{ + const char *tag; + const char *preferred_value; +}redundant_tag_properties; + +static const redundant_tag_properties lsr_redundant_tag_properties [] = +{ + { "sgn-BR", "bzs" } , + { "sgn-CO", "csn" } , + { "sgn-DE", "gsg" } , + { "sgn-DK", "dsl" } , + { "sgn-ES", "ssp" } , + { "sgn-FR", "fsl" } , + { "sgn-GB", "bfi" } , + { "sgn-GR", "gss" } , + { "sgn-IE", "isg" } , + { "sgn-IT", "ise" } , + { "sgn-JP", "jsl" } , + { "sgn-MX", "mfs" } , + { "sgn-NI", "ncs" } , + { "sgn-NL", "dse" } , + { "sgn-NO", "nsl" } , + { "sgn-PT", "psr" } , + { "sgn-SE", "swl" } , + { "sgn-US", "ase" } , + { "sgn-ZA", "sfs" } , + { "zh-cmn", "cmn" } , + { "zh-cmn-Hans", "cmn-Hans" } , + { "zh-cmn-Hant", "cmn-Hant" } , + { "zh-gan", "gan" } , + { "zh-wuu", "wuu" } +}; +typedef struct _extlang_subtag_properties +{ + const char *prefix; + const char *subtag; + const char *preferred_value; +}extlang_subtag_properties; + +static const extlang_subtag_properties lsr_extlang_subtag_properties [] = +{ + { "ar", "aao", "aao" } , + { "ar", "abh", "abh" } , + { "ar", "abv", "abv" } , + { "ar", "acm", "acm" } , + { "ar", "acq", "acq" } , + { "ar", "acw", "acw" } , + { "ar", "acx", "acx" } , + { "ar", "acy", "acy" } , + { "ar", "adf", "adf" } , + { "sgn", "ads", "ads" } , + { "ar", "aeb", "aeb" } , + { "ar", "aec", "aec" } , + { "sgn", "aed", "aed" } , + { "sgn", "aen", "aen" } , + { "ar", "afb", "afb" } , + { "sgn", "afg", "afg" } , + { "ar", "ajp", "ajp" } , + { "ar", "apc", "apc" } , + { "ar", "apd", "apd" } , + { "ar", "arb", "arb" } , + { "ar", "arq", "arq" } , + { "ar", "ars", "ars" } , + { "ar", "ary", "ary" } , + { "ar", "arz", "arz" } , + { "sgn", "ase", "ase" } , + { "sgn", "asf", "asf" } , + { "sgn", "asp", "asp" } , + { "sgn", "asq", "asq" } , + { "sgn", "asw", "asw" } , + { "ar", "auz", "auz" } , + { "ar", "avl", "avl" } , + { "ar", "ayh", "ayh" } , + { "ar", "ayl", "ayl" } , + { "ar", "ayn", "ayn" } , + { "ar", "ayp", "ayp" } , + { "ar", "bbz", "bbz" } , + { "sgn", "bfi", "bfi" } , + { "sgn", "bfk", "bfk" } , + { "ms", "bjn", "bjn" } , + { "sgn", "bog", "bog" } , + { "sgn", "bqn", "bqn" } , + { "sgn", "bqy", "bqy" } , + { "ms", "btj", "btj" } , + { "ms", "bve", "bve" } , + { "sgn", "bvl", "bvl" } , + { "ms", "bvu", "bvu" } , + { "sgn", "bzs", "bzs" } , + { "zh", "cdo", "cdo" } , + { "sgn", "cds", "cds" } , + { "zh", "cjy", "cjy" } , + { "zh", "cmn", "cmn" } , + { "ms", "coa", "coa" } , + { "zh", "cpx", "cpx" } , + { "sgn", "csc", "csc" } , + { "sgn", "csd", "csd" } , + { "sgn", "cse", "cse" } , + { "sgn", "csf", "csf" } , + { "sgn", "csg", "csg" } , + { "sgn", "csl", "csl" } , + { "sgn", "csn", "csn" } , + { "sgn", "csq", "csq" } , + { "sgn", "csr", "csr" } , + { "zh", "czh", "czh" } , + { "zh", "czo", "czo" } , + { "sgn", "doq", "doq" } , + { "sgn", "dse", "dse" } , + { "sgn", "dsl", "dsl" } , + { "ms", "dup", "dup" } , + { "sgn", "ecs", "ecs" } , + { "sgn", "esl", "esl" } , + { "sgn", "esn", "esn" } , + { "sgn", "eso", "eso" } , + { "sgn", "eth", "eth" } , + { "sgn", "fcs", "fcs" } , + { "sgn", "fse", "fse" } , + { "sgn", "fsl", "fsl" } , + { "sgn", "fss", "fss" } , + { "zh", "gan", "gan" } , + { "kok", "gom", "gom" } , + { "sgn", "gse", "gse" } , + { "sgn", "gsg", "gsg" } , + { "sgn", "gsm", "gsm" } , + { "sgn", "gss", "gss" } , + { "sgn", "gus", "gus" } , + { "sgn", "hab", "hab" } , + { "sgn", "haf", "haf" } , + { "zh", "hak", "hak" } , + { "sgn", "hds", "hds" } , + { "ms", "hji", "hji" } , + { "sgn", "hks", "hks" } , + { "sgn", "hos", "hos" } , + { "sgn", "hps", "hps" } , + { "sgn", "hsh", "hsh" } , + { "sgn", "hsl", "hsl" } , + { "zh", "hsn", "hsn" } , + { "sgn", "icl", "icl" } , + { "sgn", "ils", "ils" } , + { "sgn", "inl", "inl" } , + { "sgn", "ins", "ins" } , + { "sgn", "ise", "ise" } , + { "sgn", "isg", "isg" } , + { "sgn", "isr", "isr" } , + { "ms", "jak", "jak" } , + { "ms", "jax", "jax" } , + { "sgn", "jcs", "jcs" } , + { "sgn", "jhs", "jhs" } , + { "sgn", "jls", "jls" } , + { "sgn", "jos", "jos" } , + { "sgn", "jsl", "jsl" } , + { "sgn", "jus", "jus" } , + { "sgn", "kgi", "kgi" } , + { "kok", "knn", "knn" } , + { "ms", "kvb", "kvb" } , + { "sgn", "kvk", "kvk" } , + { "ms", "kvr", "kvr" } , + { "ms", "kxd", "kxd" } , + { "sgn", "lbs", "lbs" } , + { "ms", "lce", "lce" } , + { "ms", "lcf", "lcf" } , + { "ms", "liw", "liw" } , + { "sgn", "lls", "lls" } , + { "sgn", "lsg", "lsg" } , + { "sgn", "lsl", "lsl" } , + { "sgn", "lso", "lso" } , + { "sgn", "lsp", "lsp" } , + { "sgn", "lst", "lst" } , + { "sgn", "lsy", "lsy" } , + { "lv", "ltg", "ltg" } , + { "lv", "lvs", "lvs" } , + { "zh", "lzh", "lzh" } , + { "ms", "max", "max" } , + { "sgn", "mdl", "mdl" } , + { "ms", "meo", "meo" } , + { "ms", "mfa", "mfa" } , + { "ms", "mfb", "mfb" } , + { "sgn", "mfs", "mfs" } , + { "ms", "min", "min" } , + { "zh", "mnp", "mnp" } , + { "ms", "mqg", "mqg" } , + { "sgn", "mre", "mre" } , + { "sgn", "msd", "msd" } , + { "ms", "msi", "msi" } , + { "sgn", "msr", "msr" } , + { "ms", "mui", "mui" } , + { "sgn", "mzc", "mzc" } , + { "sgn", "mzg", "mzg" } , + { "sgn", "mzy", "mzy" } , + { "zh", "nan", "nan" } , + { "sgn", "nbs", "nbs" } , + { "sgn", "ncs", "ncs" } , + { "sgn", "nsi", "nsi" } , + { "sgn", "nsl", "nsl" } , + { "sgn", "nsp", "nsp" } , + { "sgn", "nsr", "nsr" } , + { "sgn", "nzs", "nzs" } , + { "sgn", "okl", "okl" } , + { "ms", "orn", "orn" } , + { "ms", "ors", "ors" } , + { "ms", "pel", "pel" } , + { "ar", "pga", "pga" } , + { "sgn", "pks", "pks" } , + { "sgn", "prl", "prl" } , + { "sgn", "prz", "prz" } , + { "sgn", "psc", "psc" } , + { "sgn", "psd", "psd" } , + { "ms", "pse", "pse" } , + { "sgn", "psg", "psg" } , + { "sgn", "psl", "psl" } , + { "sgn", "pso", "pso" } , + { "sgn", "psp", "psp" } , + { "sgn", "psr", "psr" } , + { "sgn", "pys", "pys" } , + { "sgn", "rms", "rms" } , + { "sgn", "rsi", "rsi" } , + { "sgn", "rsl", "rsl" } , + { "sgn", "sdl", "sdl" } , + { "sgn", "sfb", "sfb" } , + { "sgn", "sfs", "sfs" } , + { "sgn", "sgg", "sgg" } , + { "sgn", "sgx", "sgx" } , + { "ar", "shu", "shu" } , + { "sgn", "slf", "slf" } , + { "sgn", "sls", "sls" } , + { "sgn", "sqs", "sqs" } , + { "ar", "ssh", "ssh" } , + { "sgn", "ssp", "ssp" } , + { "sgn", "ssr", "ssr" } , + { "sgn", "svk", "svk" } , + { "sw", "swc", "swc" } , + { "sw", "swh", "swh" } , + { "sgn", "swl", "swl" } , + { "sgn", "syy", "syy" } , + { "ms", "tmw", "tmw" } , + { "sgn", "tse", "tse" } , + { "sgn", "tsm", "tsm" } , + { "sgn", "tsq", "tsq" } , + { "sgn", "tss", "tss" } , + { "sgn", "tsy", "tsy" } , + { "sgn", "tza", "tza" } , + { "sgn", "ugn", "ugn" } , + { "sgn", "ugy", "ugy" } , + { "sgn", "ukl", "ukl" } , + { "sgn", "uks", "uks" } , + { "ms", "urk", "urk" } , + { "uz", "uzn", "uzn" } , + { "uz", "uzs", "uzs" } , + { "sgn", "vgt", "vgt" } , + { "ms", "vkk", "vkk" } , + { "ms", "vkt", "vkt" } , + { "sgn", "vsi", "vsi" } , + { "sgn", "vsl", "vsl" } , + { "sgn", "vsv", "vsv" } , + { "zh", "wuu", "wuu" } , + { "sgn", "xki", "xki" } , + { "sgn", "xml", "xml" } , + { "ms", "xmm", "xmm" } , + { "sgn", "xms", "xms" } , + { "sgn", "yds", "yds" } , + { "sgn", "ysl", "ysl" } , + { "zh", "yue", "yue" } , + { "sgn", "zib", "zib" } , + { "ms", "zlm", "zlm" } , + { "ms", "zmi", "zmi" } , + { "sgn", "zsl", "zsl" } , + { "ms", "zsm", "zsm" } +}; +typedef struct _variant_subtag_properties +{ + const char *subtag; + const char *preferred_value; +}variant_subtag_properties; + +static const variant_subtag_properties lsr_variant_subtag_properties [] = +{ + { "heploc", "alalc97" } +};