Remove accents / diacritics from strings in JavaScript

How do I remove accents from a string? Especially in IE6, I have the following similar contents:

accentsTidy = function(s){
    var r=s.toLowerCase();
    r = r.replace(new RegExp(/\s/g),"");
    r = r.replace(new RegExp(/[àáâãäå]/g),"a");
    r = r.replace(new RegExp(/æ/g),"ae");
    r = r.replace(new RegExp(/ç/g),"c");
    r = r.replace(new RegExp(/[èéêë]/g),"e");
    r = r.replace(new RegExp(/[ìíîï]/g),"i");
    r = r.replace(new RegExp(/ñ/g),"n");                
    r = r.replace(new RegExp(/[òóôõö]/g),"o");
    r = r.replace(new RegExp(/œ/g),"oe");
    r = r.replace(new RegExp(/[ùúûü]/g),"u");
    r = r.replace(new RegExp(/[ýÿ]/g),"y");
    r = r.replace(new RegExp(/\W/g),"");
    return r;
};

But IE6 bothers me and doesn't seem to like my regular expressions.

#1 building

This is my right. lehelk.com The modified version of, The The version also removes html with accents entity :

http://jsfiddle.net/billybraga/UHmnf/

I still don't know about performance, but

var defaultDiacriticsRemovalMap = [{
    'base': "A",
    'letters': /(A|Ⓐ|A|À|Á|Â|Ầ|Ấ|Ẫ|Ẩ|Ã|Ā|Ă|Ằ|Ắ|Ẵ|Ẳ|Ȧ|Ǡ|Ä|Ǟ|Ả|Å|Ǻ|Ǎ|Ȁ|Ȃ|Ạ|Ậ|Ặ|Ḁ|Ą|Ⱥ|Ɐ|[\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F])/g},
{
    'base': "AA",
    'letters': /(Ꜳ|[\uA732])/g},
{
    'base': "AE",
    'letters': /(Æ|Ǽ|Ǣ|[\u00C6\u01FC\u01E2])/g},
{
    'base': "AO",
    'letters': /(Ꜵ|[\uA734])/g},
{
    'base': "AU",
    'letters': /(Ꜷ|[\uA736])/g},
{
    'base': "AV",
    'letters': /(Ꜹ|Ꜻ|[\uA738\uA73A])/g},
{
    'base': "AY",
    'letters': /(Ꜽ|[\uA73C])/g},
{
    'base': "B",
    'letters': /(B|Ⓑ|B|Ḃ|Ḅ|Ḇ|Ƀ|Ƃ|Ɓ|[\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181])/g},
{
    'base': "C",
    'letters': /(C|Ⓒ|C|Ć|Ĉ|Ċ|Č|Ç|Ḉ|Ƈ|Ȼ|Ꜿ|[\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E])/g},
{
    'base': "D",
    'letters': /(D|Ⓓ|D|Ḋ|Ď|Ḍ|Ḑ|Ḓ|Ḏ|Đ|Ƌ|Ɗ|Ɖ|Ꝺ|[\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779])/g},
{
    'base': "DZ",
    'letters': /(DZ|DŽ|[\u01F1\u01C4])/g},
{
    'base': "Dz",
    'letters': /(Dz|Dž|[\u01F2\u01C5])/g},
{
    'base': "E",
    'letters': /(E|Ⓔ|E|È|É|Ê|Ề|Ế|Ễ|Ể|Ẽ|Ē|Ḕ|Ḗ|Ĕ|Ė|Ë|Ẻ|Ě|Ȅ|Ȇ|Ẹ|Ệ|Ȩ|Ḝ|Ę|Ḙ|Ḛ|Ɛ|Ǝ|[\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E])/g},
{
    'base': "F",
    'letters': /(F|Ⓕ|F|Ḟ|Ƒ|Ꝼ|[\u0046\u24BB\uFF26\u1E1E\u0191\uA77B])/g},
{
    'base': "G",
    'letters': /(G|Ⓖ|G|Ǵ|Ĝ|Ḡ|Ğ|Ġ|Ǧ|Ģ|Ǥ|Ɠ|Ꞡ|Ᵹ|Ꝿ|[\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E])/g},
{
    'base': "H",
    'letters': /(H|Ⓗ|H|Ĥ|Ḣ|Ḧ|Ȟ|Ḥ|Ḩ|Ḫ|Ħ|Ⱨ|Ⱶ|Ɥ|[\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D])/g},
{
    'base': "I",
    'letters': /(I|Ⓘ|I|Ì|Í|Î|Ĩ|Ī|Ĭ|İ|Ï|Ḯ|Ỉ|Ǐ|Ȉ|Ȋ|Ị|Į|Ḭ|Ɨ|[\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197])/g},
{
    'base': "J",
    'letters': /(J|Ⓙ|J|Ĵ|Ɉ|[\u004A\u24BF\uFF2A\u0134\u0248])/g},
{
    'base': "K",
    'letters': /(K|Ⓚ|K|Ḱ|Ǩ|Ḳ|Ķ|Ḵ|Ƙ|Ⱪ|Ꝁ|Ꝃ|Ꝅ|Ꞣ|[\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2])/g},
{
    'base': "L",
    'letters': /(L|Ⓛ|L|Ŀ|Ĺ|Ľ|Ḷ|Ḹ|Ļ|Ḽ|Ḻ|Ł|Ƚ|Ɫ|Ⱡ|Ꝉ|Ꝇ|Ꞁ|[\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780])/g},
{
    'base': "LJ",
    'letters': /(LJ|[\u01C7])/g},
{
    'base': "Lj",
    'letters': /(Lj|[\u01C8])/g},
{
    'base': "M",
    'letters': /(M|Ⓜ|M|Ḿ|Ṁ|Ṃ|Ɱ|Ɯ|[\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C])/g},
{
    'base': "N",
    'letters': /(N|Ⓝ|N|Ǹ|Ń|Ñ|Ṅ|Ň|Ṇ|Ņ|Ṋ|Ṉ|Ƞ|Ɲ|Ꞑ|Ꞥ|[\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4])/g},
{
    'base': "NJ",
    'letters': /(NJ|[\u01CA])/g},
{
    'base': "Nj",
    'letters': /(Nj|[\u01CB])/g},
{
    'base': "O",
    'letters': /(O|Ⓞ|O|Ò|Ó|Ô|Ồ|Ố|Ỗ|Ổ|Õ|Ṍ|Ȭ|Ṏ|Ō|Ṑ|Ṓ|Ŏ|Ȯ|Ȱ|Ö|Ȫ|Ỏ|Ő|Ǒ|Ȍ|Ȏ|Ơ|Ờ|Ớ|Ỡ|Ở|Ợ|Ọ|Ộ|Ǫ|Ǭ|Ø|Ǿ|Ɔ|Ɵ|Ꝋ|Ꝍ|[\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C])/g},
{
    'base': "OI",
    'letters': /(Ƣ|[\u01A2])/g},
{
    'base': "OO",
    'letters': /(Ꝏ|[\uA74E])/g},
{
    'base': "OU",
    'letters': /(Ȣ|[\u0222])/g},
{
    'base': "P",
    'letters': /(P|Ⓟ|P|Ṕ|Ṗ|Ƥ|Ᵽ|Ꝑ|Ꝓ|Ꝕ|[\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754])/g},
{
    'base': "Q",
    'letters': /(Q|Ⓠ|Q|Ꝗ|Ꝙ|Ɋ|[\u0051\u24C6\uFF31\uA756\uA758\u024A])/g},
{
    'base': "R",
    'letters': /(R|Ⓡ|R|Ŕ|Ṙ|Ř|Ȑ|Ȓ|Ṛ|Ṝ|Ŗ|Ṟ|Ɍ|Ɽ|Ꝛ|Ꞧ|Ꞃ|[\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782])/g},
{
    'base': "S",
    'letters': /(S|Ⓢ|S|ẞ|Ś|Ṥ|Ŝ|Ṡ|Š|Ṧ|Ṣ|Ṩ|Ș|Ş|Ȿ|Ꞩ|Ꞅ|[\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784])/g},
{
    'base': "T",
    'letters': /(T|Ⓣ|T|Ṫ|Ť|Ṭ|Ț|Ţ|Ṱ|Ṯ|Ŧ|Ƭ|Ʈ|Ⱦ|Ꞇ|[\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786])/g},
{
    'base': "TZ",
    'letters': /(Ꜩ|[\uA728])/g},
{
    'base': "U",
    'letters': /(U|Ⓤ|U|Ù|Ú|Û|Ũ|Ṹ|Ū|Ṻ|Ŭ|Ü|Ǜ|Ǘ|Ǖ|Ǚ|Ủ|Ů|Ű|Ǔ|Ȕ|Ȗ|Ư|Ừ|Ứ|Ữ|Ử|Ự|Ụ|Ṳ|Ų|Ṷ|Ṵ|Ʉ|[\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244])/g},
{
    'base': "V",
    'letters': /(V|Ⓥ|V|Ṽ|Ṿ|Ʋ|Ꝟ|Ʌ|[\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245])/g},
{
    'base': "VY",
    'letters': /(Ꝡ|[\uA760])/g},
{
    'base': "W",
    'letters': /(W|Ⓦ|W|Ẁ|Ẃ|Ŵ|Ẇ|Ẅ|Ẉ|Ⱳ|[\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72])/g},
{
    'base': "X",
    'letters': /(X|Ⓧ|X|Ẋ|Ẍ|[\u0058\u24CD\uFF38\u1E8A\u1E8C])/g},
{
    'base': "Y",
    'letters': /(Y|Ⓨ|Y|Ỳ|Ý|Ŷ|Ỹ|Ȳ|Ẏ|Ÿ|Ỷ|Ỵ|Ƴ|Ɏ|Ỿ|[\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE])/g},
{
    'base': "Z",
    'letters': /(Z|Ⓩ|Z|Ź|Ẑ|Ż|Ž|Ẓ|Ẕ|Ƶ|Ȥ|Ɀ|Ⱬ|Ꝣ|[\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762])/g},
{
    'base': "a",
    'letters': /(a|ⓐ|a|ẚ|à|á|â|ầ|ấ|ẫ|ẩ|ã|ā|ă|ằ|ắ|ẵ|ẳ|ȧ|ǡ|ä|ǟ|ả|å|ǻ|ǎ|ȁ|ȃ|ạ|ậ|ặ|ḁ|ą|ⱥ|ɐ|[\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250])/g},
{
    'base': "aa",
    'letters': /(ꜳ|[\uA733])/g},
{
    'base': "ae",
    'letters': /(æ|ǽ|ǣ|[\u00E6\u01FD\u01E3])/g},
{
    'base': "ao",
    'letters': /(ꜵ|[\uA735])/g},
{
    'base': "au",
    'letters': /(ꜷ|[\uA737])/g},
{
    'base': "av",
    'letters': /(ꜹ|ꜻ|[\uA739\uA73B])/g},
{
    'base': "ay",
    'letters': /(ꜽ|[\uA73D])/g},
{
    'base': "b",
    'letters': /(b|ⓑ|b|ḃ|ḅ|ḇ|ƀ|ƃ|ɓ|[\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253])/g},
{
    'base': "c",
    'letters': /(c|ⓒ|c|ć|ĉ|ċ|č|ç|ḉ|ƈ|ȼ|ꜿ|ↄ|[\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184])/g},
{
    'base': "d",
    'letters': /(d|ⓓ|d|ḋ|ď|ḍ|ḑ|ḓ|ḏ|đ|ƌ|ɖ|ɗ|ꝺ|[\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A])/g},
{
    'base': "dz",
    'letters': /(dz|dž|[\u01F3\u01C6])/g},
{
    'base': "e",
    'letters': /(e|ⓔ|e|è|é|ê|ề|ế|ễ|ể|ẽ|ē|ḕ|ḗ|ĕ|ė|ë|ẻ|ě|ȅ|ȇ|ẹ|ệ|ȩ|ḝ|ę|ḙ|ḛ|ɇ|ɛ|ǝ|[\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD])/g},
{
    'base': "f",
    'letters': /(f|ⓕ|f|ḟ|ƒ|ꝼ|[\u0066\u24D5\uFF46\u1E1F\u0192\uA77C])/g},
{
    'base': "g",
    'letters': /(g|ⓖ|g|ǵ|ĝ|ḡ|ğ|ġ|ǧ|ģ|ǥ|ɠ|ꞡ|ᵹ|ꝿ|[\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F])/g},
{
    'base': "h",
    'letters': /(h|ⓗ|h|ĥ|ḣ|ḧ|ȟ|ḥ|ḩ|ḫ|ẖ|ħ|ⱨ|ⱶ|ɥ|[\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265])/g},
{
    'base': "hv",
    'letters': /(ƕ|[\u0195])/g},
{
    'base': "i",
    'letters': /(i|ⓘ|i|ì|í|î|ĩ|ī|ĭ|ï|ḯ|ỉ|ǐ|ȉ|ȋ|ị|į|ḭ|ɨ|ı|[\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131])/g},
{
    'base': "j",
    'letters': /(j|ⓙ|j|ĵ|ǰ|ɉ|[\u006A\u24D9\uFF4A\u0135\u01F0\u0249])/g},
{
    'base': "k",
    'letters': /(k|ⓚ|k|ḱ|ǩ|ḳ|ķ|ḵ|ƙ|ⱪ|ꝁ|ꝃ|ꝅ|ꞣ|[\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3])/g},
{
    'base': "l",
    'letters': /(l|ⓛ|l|ŀ|ĺ|ľ|ḷ|ḹ|ļ|ḽ|ḻ|ſ|ł|ƚ|ɫ|ⱡ|ꝉ|ꞁ|ꝇ|[\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747])/g},
{
    'base': "lj",
    'letters': /(lj|[\u01C9])/g},
{
    'base': "m",
    'letters': /(m|ⓜ|m|ḿ|ṁ|ṃ|ɱ|ɯ|[\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F])/g},
{
    'base': "n",
    'letters': /(n|ⓝ|n|ǹ|ń|ñ|ṅ|ň|ṇ|ņ|ṋ|ṉ|ƞ|ɲ|ʼn|ꞑ|ꞥ|[\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5])/g},
{
    'base': "nj",
    'letters': /(nj|[\u01CC])/g},
{
    'base': "o",
    'letters': /(o|ⓞ|o|ò|ó|ô|ồ|ố|ỗ|ổ|õ|ṍ|ȭ|ṏ|ō|ṑ|ṓ|ŏ|ȯ|ȱ|ö|ȫ|ỏ|ő|ǒ|ȍ|ȏ|ơ|ờ|ớ|ỡ|ở|ợ|ọ|ộ|ǫ|ǭ|ø|ǿ|ɔ|ꝋ|ꝍ|ɵ|[\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275])/g},
{
    'base': "oi",
    'letters': /(ƣ|[\u01A3])/g},
{
    'base': "ou",
    'letters': /(ȣ|[\u0223])/g},
{
    'base': "oo",
    'letters': /(ꝏ|[\uA74F])/g},
{
    'base': "p",
    'letters': /(p|ⓟ|p|ṕ|ṗ|ƥ|ᵽ|ꝑ|ꝓ|ꝕ|[\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755])/g},
{
    'base': "q",
    'letters': /(q|ⓠ|q|ɋ|ꝗ|ꝙ|[\u0071\u24E0\uFF51\u024B\uA757\uA759])/g},
{
    'base': "r",
    'letters': /(r|ⓡ|r|ŕ|ṙ|ř|ȑ|ȓ|ṛ|ṝ|ŗ|ṟ|ɍ|ɽ|ꝛ|ꞧ|ꞃ|[\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783])/g},
{
    'base': "s",
    'letters': /(s|ⓢ|s|ß|ś|ṥ|ŝ|ṡ|š|ṧ|ṣ|ṩ|ș|ş|ȿ|ꞩ|ꞅ|ẛ|[\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B])/g},
{
    'base': "t",
    'letters': /(t|ⓣ|t|ṫ|ẗ|ť|ṭ|ț|ţ|ṱ|ṯ|ŧ|ƭ|ʈ|ⱦ|ꞇ|[\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787])/g},
{
    'base': "tz",
    'letters': /(ꜩ|[\uA729])/g},
{
    'base': "u",
    'letters': /(u|ⓤ|u|ù|ú|û|ũ|ṹ|ū|ṻ|ŭ|ü|ǜ|ǘ|ǖ|ǚ|ủ|ů|ű|ǔ|ȕ|ȗ|ư|ừ|ứ|ữ|ử|ự|ụ|ṳ|ų|ṷ|ṵ|ʉ|[\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289])/g},
{
    'base': "v",
    'letters': /(v|ⓥ|v|ṽ|ṿ|ʋ|ꝟ|ʌ|[\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C])/g},
{
    'base': "vy",
    'letters': /(ꝡ|[\uA761])/g},
{
    'base': "w",
    'letters': /(w|ⓦ|w|ẁ|ẃ|ŵ|ẇ|ẅ|ẘ|ẉ|ⱳ|[\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73])/g},
{
    'base': "x",
    'letters': /(x|ⓧ|x|ẋ|ẍ|[\u0078\u24E7\uFF58\u1E8B\u1E8D])/g},
{
    'base': "y",
    'letters': /(y|ⓨ|y|ỳ|ý|ŷ|ỹ|ȳ|ẏ|ÿ|ỷ|ẙ|ỵ|ƴ|ɏ|ỿ|[\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF])/g},
{
    'base': "z",
    'letters': /(z|ⓩ|z|ź|ẑ|ż|ž|ẓ|ẕ|ƶ|ȥ|ɀ|ⱬ|ꝣ|[\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763])/g}];

function removeDiacritics(str) {
    for (var i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
        str = str.replace(defaultDiacriticsRemovalMap[i].letters, defaultDiacriticsRemovalMap[i].base);
    }
    return str;
}

#2 building

Based on Ian Elliott's excellent solution, the code is shortened:

accentsTidy = function(s){
    var r = s.toLowerCase();
    non_asciis = {'a': '[àáâãäå]', 'ae': 'æ', 'c': 'ç', 'e': '[èéêë]', 'i': '[ìíîï]', 'n': 'ñ', 'o': '[òóôõö]', 'oe': 'œ', 'u': '[ùúûűü]', 'y': '[ýÿ]'};
    for (i in non_asciis) { r = r.replace(new RegExp(non_asciis[i], 'g'), i); }
    return r;
};

Edit: invalid code corrected

#3 building

I have made some changes to the khel version for one reason: each regular expression parsing / replacement takes O (n) operations, where n is the number of characters in the target text. But regexp is not what we really need. So:

/* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ var defaultDiacriticsRemovalMap = [ {'base':'A', 'letters':'\A\Ⓐ\A\À\Á\Â\Ầ\Ấ\Ẫ\Ẩ\Ã\Ā\Ă\Ằ\Ắ\Ẵ\Ẳ\Ȧ\Ǡ\Ä\Ǟ\Ả\Å\Ǻ\Ǎ\Ȁ\Ȃ\Ạ\Ậ\Ặ\Ḁ\Ą\Ⱥ\Ɐ'}, {'base':'AA','letters':'\Ꜳ'}, {'base':'AE','letters':'\Æ\Ǽ\Ǣ'}, {'base':'AO','letters':'\Ꜵ'}, {'base':'AU','letters':'\Ꜷ'}, {'base':'AV','letters':'\Ꜹ\Ꜻ'}, {'base':'AY','letters':'\Ꜽ'}, {'base':'B', 'letters':'\B\Ⓑ\B\Ḃ\Ḅ\Ḇ\Ƀ\Ƃ\Ɓ'}, {'base':'C', 'letters':'\C\Ⓒ\C\Ć\Ĉ\Ċ\Č\Ç\Ḉ\Ƈ\Ȼ\Ꜿ'}, {'base':'D', 'letters':'\D\Ⓓ\D\Ḋ\Ď\Ḍ\Ḑ\Ḓ\Ḏ\Đ\Ƌ\Ɗ\Ɖ\Ꝺ\Ð'}, {'base':'DZ','letters':'\DZ\DŽ'}, {'base':'Dz','letters':'\Dz\Dž'}, {'base':'E', 'letters':'\E\Ⓔ\E\È\É\Ê\Ề\Ế\Ễ\Ể\Ẽ\Ē\Ḕ\Ḗ\Ĕ\Ė\Ë\Ẻ\Ě\Ȅ\Ȇ\Ẹ\Ệ\Ȩ\Ḝ\Ę\Ḙ\Ḛ\Ɛ\Ǝ'}, {'base':'F', 'letters':'\F\Ⓕ\F\Ḟ\Ƒ\Ꝼ'}, {'base':'G', 'letters':'\G\Ⓖ\G\Ǵ\Ĝ\Ḡ\Ğ\Ġ\Ǧ\Ģ\Ǥ\Ɠ\Ꞡ\Ᵹ\Ꝿ'}, {'base':'H', 'letters':'\H\Ⓗ\H\Ĥ\Ḣ\Ḧ\Ȟ\Ḥ\Ḩ\Ḫ\Ħ\Ⱨ\Ⱶ\Ɥ'}, {'base':'I', 'letters':'\I\Ⓘ\I\Ì\Í\Î\Ĩ\Ī\Ĭ\İ\Ï\Ḯ\Ỉ\Ǐ\Ȉ\Ȋ\Ị\Į\Ḭ\Ɨ'}, {'base':'J', 'letters':'\J\Ⓙ\J\Ĵ\Ɉ'}, {'base':'K', 'letters':'\K\Ⓚ\K\Ḱ\Ǩ\Ḳ\Ķ\Ḵ\Ƙ\Ⱪ\Ꝁ\Ꝃ\Ꝅ\Ꞣ'}, {'base':'L', 'letters':'\L\Ⓛ\L\Ŀ\Ĺ\Ľ\Ḷ\Ḹ\Ļ\Ḽ\Ḻ\Ł\Ƚ\Ɫ\Ⱡ\Ꝉ\Ꝇ\Ꞁ'}, {'base':'LJ','letters':'\LJ'}, {'base':'Lj','letters':'\Lj'}, {'base':'M', 'letters':'\M\Ⓜ\M\Ḿ\Ṁ\Ṃ\Ɱ\Ɯ'}, {'base':'N', 'letters':'\N\Ⓝ\N\Ǹ\Ń\Ñ\Ṅ\Ň\Ṇ\Ņ\Ṋ\Ṉ\Ƞ\Ɲ\Ꞑ\Ꞥ'}, {'base':'NJ','letters':'\NJ'}, {'base':'Nj','letters':'\Nj'}, {'base':'O', 'letters':'\O\Ⓞ\O\Ò\Ó\Ô\Ồ\Ố\Ỗ\Ổ\Õ\Ṍ\Ȭ\Ṏ\Ō\Ṑ\Ṓ\Ŏ\Ȯ\Ȱ\Ö\Ȫ\Ỏ\Ő\Ǒ\Ȍ\Ȏ\Ơ\Ờ\Ớ\Ỡ\Ở\Ợ\Ọ\Ộ\Ǫ\Ǭ\Ø\Ǿ\Ɔ\Ɵ\Ꝋ\Ꝍ'}, {'base':'OI','letters':'\Ƣ'}, {'base':'OO','letters':'\Ꝏ'}, {'base':'OU','letters':'\Ȣ'}, {'base':'OE','letters':'\Œ\Œ'}, {'base':'oe','letters':'\œ\œ'}, {'base':'P', 'letters':'\P\Ⓟ\P\Ṕ\Ṗ\Ƥ\Ᵽ\Ꝑ\Ꝓ\Ꝕ'}, {'base':'Q', 'letters':'\Q\Ⓠ\Q\Ꝗ\Ꝙ\Ɋ'}, {'base':'R', 'letters':'\R\Ⓡ\R\Ŕ\Ṙ\Ř\Ȑ\Ȓ\Ṛ\Ṝ\Ŗ\Ṟ\Ɍ\Ɽ\Ꝛ\Ꞧ\Ꞃ'}, {'base':'S', 'letters':'\S\Ⓢ\S\ẞ\Ś\Ṥ\Ŝ\Ṡ\Š\Ṧ\Ṣ\Ṩ\Ș\Ş\Ȿ\Ꞩ\Ꞅ'}, {'base':'T', 'letters':'\T\Ⓣ\T\Ṫ\Ť\Ṭ\Ț\Ţ\Ṱ\Ṯ\Ŧ\Ƭ\Ʈ\Ⱦ\Ꞇ'}, {'base':'TZ','letters':'\Ꜩ'}, {'base':'U', 'letters':'\U\Ⓤ\U\Ù\Ú\Û\Ũ\Ṹ\Ū\Ṻ\Ŭ\Ü\Ǜ\Ǘ\Ǖ\Ǚ\Ủ\Ů\Ű\Ǔ\Ȕ\Ȗ\Ư\Ừ\Ứ\Ữ\Ử\Ự\Ụ\Ṳ\Ų\Ṷ\Ṵ\Ʉ'}, {'base':'V', 'letters':'\V\Ⓥ\V\Ṽ\Ṿ\Ʋ\Ꝟ\Ʌ'}, {'base':'VY','letters':'\Ꝡ'}, {'base':'W', 'letters':'\W\Ⓦ\W\Ẁ\Ẃ\Ŵ\Ẇ\Ẅ\Ẉ\Ⱳ'}, {'base':'X', 'letters':'\X\Ⓧ\X\Ẋ\Ẍ'}, {'base':'Y', 'letters':'\Y\Ⓨ\Y\Ỳ\Ý\Ŷ\Ỹ\Ȳ\Ẏ\Ÿ\Ỷ\Ỵ\Ƴ\Ɏ\Ỿ'}, {'base':'Z', 'letters':'\Z\Ⓩ\Z\Ź\Ẑ\Ż\Ž\Ẓ\Ẕ\Ƶ\Ȥ\Ɀ\Ⱬ\Ꝣ'}, {'base':'a', 'letters':'\a\ⓐ\a\ẚ\à\á\â\ầ\ấ\ẫ\ẩ\ã\ā\ă\ằ\ắ\ẵ\ẳ\ȧ\ǡ\ä\ǟ\ả\å\ǻ\ǎ\ȁ\ȃ\ạ\ậ\ặ\ḁ\ą\ⱥ\ɐ'}, {'base':'aa','letters':'\ꜳ'}, {'base':'ae','letters':'\æ\ǽ\ǣ'}, {'base':'ao','letters':'\ꜵ'}, {'base':'au','letters':'\ꜷ'}, {'base':'av','letters':'\ꜹ\ꜻ'}, {'base':'ay','letters':'\ꜽ'}, {'base':'b', 'letters':'\b\ⓑ\b\ḃ\ḅ\ḇ\ƀ\ƃ\ɓ'}, {'base':'c', 'letters':'\c\ⓒ\c\ć\ĉ\ċ\č\ç\ḉ\ƈ\ȼ\ꜿ\ↄ'}, {'base':'d', 'letters':'\d\ⓓ\d\ḋ\ď\ḍ\ḑ\ḓ\ḏ\đ\ƌ\ɖ\ɗ\ꝺ'}, {'base':'dz','letters':'\dz\dž'}, {'base':'e', 'letters':'\e\ⓔ\e\è\é\ê\ề\ế\ễ\ể\ẽ\ē\ḕ\ḗ\ĕ\ė\ë\ẻ\ě\ȅ\ȇ\ẹ\ệ\ȩ\ḝ\ę\ḙ\ḛ\ɇ\ɛ\ǝ'}, {'base':'f', 'letters':'\f\ⓕ\f\ḟ\ƒ\ꝼ'}, {'base':'g', 'letters':'\g\ⓖ\g\ǵ\ĝ\ḡ\ğ\ġ\ǧ\ģ\ǥ\ɠ\ꞡ\ᵹ\ꝿ'}, {'base':'h', 'letters':'\h\ⓗ\h\ĥ\ḣ\ḧ\ȟ\ḥ\ḩ\ḫ\ẖ\ħ\ⱨ\ⱶ\ɥ'}, {'base':'hv','letters':'\ƕ'}, {'base':'i', 'letters':'\i\ⓘ\i\ì\í\î\ĩ\ī\ĭ\ï\ḯ\ỉ\ǐ\ȉ\ȋ\ị\į\ḭ\ɨ\ı'}, {'base':'j', 'letters':'\j\ⓙ\j\ĵ\ǰ\ɉ'}, {'base':'k', 'letters':'\k\ⓚ\k\ḱ\ǩ\ḳ\ķ\ḵ\ƙ\ⱪ\ꝁ\ꝃ\ꝅ\ꞣ'}, {'base':'l', 'letters':'\l\ⓛ\l\ŀ\ĺ\ľ\ḷ\ḹ\ļ\ḽ\ḻ\ſ\ł\ƚ\ɫ\ⱡ\ꝉ\ꞁ\ꝇ'}, {'base':'lj','letters':'\lj'}, {'base':'m', 'letters':'\m\ⓜ\m\ḿ\ṁ\ṃ\ɱ\ɯ'}, {'base':'n', 'letters':'\n\ⓝ\n\ǹ\ń\ñ\ṅ\ň\ṇ\ņ\ṋ\ṉ\ƞ\ɲ\ʼn\ꞑ\ꞥ'}, {'base':'nj','letters':'\nj'}, {'base':'o', 'letters':'\o\ⓞ\o\ò\ó\ô\ồ\ố\ỗ\ổ\õ\ṍ\ȭ\ṏ\ō\ṑ\ṓ\ŏ\ȯ\ȱ\ö\ȫ\ỏ\ő\ǒ\ȍ\ȏ\ơ\ờ\ớ\ỡ\ở\ợ\ọ\ộ\ǫ\ǭ\ø\ǿ\ɔ\ꝋ\ꝍ\ɵ'}, {'base':'oi','letters':'\ƣ'}, {'base':'ou','letters':'\ȣ'}, {'base':'oo','letters':'\ꝏ'}, {'base':'p','letters':'\p\ⓟ\p\ṕ\ṗ\ƥ\ᵽ\ꝑ\ꝓ\ꝕ'}, {'base':'q','letters':'\q\ⓠ\q\ɋ\ꝗ\ꝙ'}, {'base':'r','letters':'\r\ⓡ\r\ŕ\ṙ\ř\ȑ\ȓ\ṛ\ṝ\ŗ\ṟ\ɍ\ɽ\ꝛ\ꞧ\ꞃ'}, {'base':'s','letters':'\s\ⓢ\s\ß\ś\ṥ\ŝ\ṡ\š\ṧ\ṣ\ṩ\ș\ş\ȿ\ꞩ\ꞅ\ẛ'}, {'base':'t','letters':'\t\ⓣ\t\ṫ\ẗ\ť\ṭ\ț\ţ\ṱ\ṯ\ŧ\ƭ\ʈ\ⱦ\ꞇ'}, {'base':'tz','letters':'\ꜩ'}, {'base':'u','letters': '\u\ⓤ\u\ù\ú\û\ũ\ṹ\ū\ṻ\ŭ\ü\ǜ\ǘ\ǖ\ǚ\ủ\ů\ű\ǔ\ȕ\ȗ\ư\ừ\ứ\ữ\ử\ự\ụ\ṳ\ų\ṷ\ṵ\ʉ'}, {'base':'v','letters':'\v\ⓥ\v\ṽ\ṿ\ʋ\ꝟ\ʌ'}, {'base':'vy','letters':'\ꝡ'}, {'base':'w','letters':'\w\ⓦ\w\ẁ\ẃ\ŵ\ẇ\ẅ\ẘ\ẉ\ⱳ'}, {'base':'x','letters':'\x\ⓧ\x\ẋ\ẍ'}, {'base':'y','letters':'\y\ⓨ\y\ỳ\ý\ŷ\ỹ\ȳ\ẏ\ÿ\ỷ\ẙ\ỵ\ƴ\ɏ\ỿ'}, {'base':'z','letters':'\z\ⓩ\z\ź\ẑ\ż\ž\ẓ\ẕ\ƶ\ȥ\ɀ\ⱬ\ꝣ'} ]; var diacriticsMap = {}; for (var i=0; i < defaultDiacriticsRemovalMap .length; i++){ var letters = defaultDiacriticsRemovalMap [i].letters; for (var j=0; j < letters.length ; j++){ diacriticsMap[letters[j]] = defaultDiacriticsRemovalMap [i].base; } } // "what?" version ... http://jsperf.com/diacritics/12 function removeDiacritics (str) { return str.replace(/[^\

Tags: Apache

Posted on Fri, 14 Feb 2020 08:01:03 -0800 by mayfair