/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /*************************************************************************** * utf8.cc * * Tue Feb 27 19:18:23 CET 2007 * Copyright 2006 Bent Bisballe Nyeng * deva@aasimon.org ****************************************************************************/ /* * This file is part of DrumGizmo. * * DrumGizmo is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * DrumGizmo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with DrumGizmo; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include "utf8.h" UTF8::UTF8() { // Encode Map map_encode["\x80"] = "�\x80"; map_encode["\x81"] = "�\x81"; map_encode["\x82"] = "�\x82"; map_encode["\x83"] = "�\x83"; map_encode["\x84"] = "�\x84"; map_encode["\x85"] = "�\x85"; map_encode["\x86"] = "�\x86"; map_encode["\x87"] = "�\x87"; map_encode["\x88"] = "�\x88"; map_encode["\x89"] = "�\x89"; map_encode["\x8a"] = "�\x8a"; map_encode["\x8b"] = "�\x8b"; map_encode["\x8c"] = "�\x8c"; map_encode["\x8d"] = "�\x8d"; map_encode["\x8e"] = "�\x8e"; map_encode["\x8f"] = "�\x8f"; map_encode["\x90"] = "�\x90"; map_encode["\x91"] = "�\x91"; map_encode["\x92"] = "�\x92"; map_encode["\x93"] = "�\x93"; map_encode["\x94"] = "�\x94"; map_encode["\x95"] = "�\x95"; map_encode["\x96"] = "�\x96"; map_encode["\x97"] = "�\x97"; map_encode["\x98"] = "�\x98"; map_encode["\x99"] = "�\x99"; map_encode["\x9a"] = "�\x9a"; map_encode["\x9b"] = "�\x9b"; map_encode["\x9c"] = "�\x9c"; map_encode["\x9d"] = "�\x9d"; map_encode["\x9e"] = "�\x9e"; map_encode["\x9f"] = "�\x9f"; map_encode["�"] = " "; map_encode["�"] = "¡"; map_encode["�"] = "¢"; map_encode["�"] = "£"; map_encode["�"] = "¤"; map_encode["�"] = "¥"; map_encode["�"] = "¦"; map_encode["�"] = "§"; map_encode["�"] = "¨"; map_encode["�"] = "©"; map_encode["�"] = "ª"; map_encode["�"] = "«"; map_encode["�"] = "¬"; map_encode["�"] = ""; map_encode["�"] = "®"; map_encode["�"] = "¯"; map_encode["�"] = "°"; map_encode["�"] = "±"; map_encode["�"] = "²"; map_encode["�"] = "³"; map_encode["�"] = "´"; map_encode["�"] = "µ"; map_encode["�"] = "¶"; map_encode["�"] = "·"; map_encode["�"] = "¸"; map_encode["�"] = "¹"; map_encode["�"] = "º"; map_encode["�"] = "»"; map_encode["�"] = "¼"; map_encode["�"] = "½"; map_encode["�"] = "¾"; map_encode["�"] = "¿"; map_encode["�"] = "�\x80"; map_encode["�"] = "�\x81"; map_encode["�"] = "�\x82"; map_encode["�"] = "�\x83"; map_encode["�"] = "�\x84"; map_encode["�"] = "�\x85"; map_encode["�"] = "�\x86"; map_encode["�"] = "�\x87"; map_encode["�"] = "�\x88"; map_encode["�"] = "�\x89"; map_encode["�"] = "�\x8a"; map_encode["�"] = "�\x8b"; map_encode["�"] = "�\x8c"; map_encode["�"] = "�\x8d"; map_encode["�"] = "�\x8e"; map_encode["�"] = "�\x8f"; map_encode["�"] = "�\x90"; map_encode["�"] = "�\x91"; map_encode["�"] = "�\x92"; map_encode["�"] = "�\x93"; map_encode["�"] = "�\x94"; map_encode["�"] = "�\x95"; map_encode["�"] = "�\x96"; map_encode["�"] = "�\x97"; map_encode["�"] = "�\x98"; map_encode["�"] = "�\x99"; map_encode["�"] = "�\x9a"; map_encode["�"] = "�\x9b"; map_encode["�"] = "�\x9c"; map_encode["�"] = "�\x9d"; map_encode["�"] = "�\x9e"; map_encode["�"] = "�\x9f"; map_encode["�"] = "à"; map_encode["�"] = "á"; map_encode["�"] = "â"; map_encode["�"] = "ã"; map_encode["�"] = "ä"; map_encode["�"] = "å"; map_encode["�"] = "æ"; map_encode["�"] = "ç"; map_encode["�"] = "è"; map_encode["�"] = "é"; map_encode["�"] = "ê"; map_encode["�"] = "ë"; map_encode["�"] = "ì"; map_encode["�"] = "í"; map_encode["�"] = "î"; map_encode["�"] = "ï"; map_encode["�"] = "ð"; map_encode["�"] = "ñ"; map_encode["�"] = "ò"; map_encode["�"] = "ó"; map_encode["�"] = "ô"; map_encode["�"] = "õ"; map_encode["�"] = "ö"; map_encode["�"] = "÷"; map_encode["�"] = "ø"; map_encode["�"] = "ù"; map_encode["�"] = "ú"; map_encode["�"] = "û"; map_encode["�"] = "ü"; map_encode["�"] = "ý"; map_encode["�"] = "þ"; map_encode["�"] = "ÿ"; // Decode Map map_decode["�\x80"] = "\x80"; map_decode["�\x81"] = "\x81"; map_decode["�\x82"] = "\x82"; map_decode["�\x83"] = "\x83"; map_decode["�\x84"] = "\x84"; map_decode["�\x85"] = "\x85"; map_decode["�\x86"] = "\x86"; map_decode["�\x87"] = "\x87"; map_decode["�\x88"] = "\x88"; map_decode["�\x89"] = "\x89"; map_decode["�\x8a"] = "\x8a"; map_decode["�\x8b"] = "\x8b"; map_decode["�\x8c"] = "\x8c"; map_decode["�\x8d"] = "\x8d"; map_decode["�\x8e"] = "\x8e"; map_decode["�\x8f"] = "\x8f"; map_decode["�\x90"] = "\x90"; map_decode["�\x91"] = "\x91"; map_decode["�\x92"] = "\x92"; map_decode["�\x93"] = "\x93"; map_decode["�\x94"] = "\x94"; map_decode["�\x95"] = "\x95"; map_decode["�\x96"] = "\x96"; map_decode["�\x97"] = "\x97"; map_decode["�\x98"] = "\x98"; map_decode["�\x99"] = "\x99"; map_decode["�\x9a"] = "\x9a"; map_decode["�\x9b"] = "\x9b"; map_decode["�\x9c"] = "\x9c"; map_decode["�\x9d"] = "\x9d"; map_decode["�\x9e"] = "\x9e"; map_decode["�\x9f"] = "\x9f"; map_decode[" "] = "�"; map_decode["¡"] = "�"; map_decode["¢"] = "�"; map_decode["£"] = "�"; map_decode["¤"] = "�"; map_decode["¥"] = "�"; map_decode["¦"] = "�"; map_decode["§"] = "�"; map_decode["¨"] = "�"; map_decode["©"] = "�"; map_decode["ª"] = "�"; map_decode["«"] = "�"; map_decode["¬"] = "�"; map_decode[""] = "�"; map_decode["®"] = "�"; map_decode["¯"] = "�"; map_decode["°"] = "�"; map_decode["±"] = "�"; map_decode["²"] = "�"; map_decode["³"] = "�"; map_decode["´"] = "�"; map_decode["µ"] = "�"; map_decode["¶"] = "�"; map_decode["·"] = "�"; map_decode["¸"] = "�"; map_decode["¹"] = "�"; map_decode["º"] = "�"; map_decode["»"] = "�"; map_decode["¼"] = "�"; map_decode["½"] = "�"; map_decode["¾"] = "�"; map_decode["¿"] = "�"; map_decode["�\x80"] = "�"; map_decode["�\x81"] = "�"; map_decode["�\x82"] = "�"; map_decode["�\x83"] = "�"; map_decode["�\x84"] = "�"; map_decode["�\x85"] = "�"; map_decode["�\x86"] = "�"; map_decode["�\x87"] = "�"; map_decode["�\x88"] = "�"; map_decode["�\x89"] = "�"; map_decode["�\x8a"] = "�"; map_decode["�\x8b"] = "�"; map_decode["�\x8c"] = "�"; map_decode["�\x8d"] = "�"; map_decode["�\x8e"] = "�"; map_decode["�\x8f"] = "�"; map_decode["�\x90"] = "�"; map_decode["�\x91"] = "�"; map_decode["�\x92"] = "�"; map_decode["�\x93"] = "�"; map_decode["�\x94"] = "�"; map_decode["�\x95"] = "�"; map_decode["�\x96"] = "�"; map_decode["�\x97"] = "�"; map_decode["�\x98"] = "�"; map_decode["�\x99"] = "�"; map_decode["�\x9a"] = "�"; map_decode["�\x9b"] = "�"; map_decode["�\x9c"] = "�"; map_decode["�\x9d"] = "�"; map_decode["�\x9e"] = "�"; map_decode["�\x9f"] = "�"; map_decode["à"] = "�"; map_decode["á"] = "�"; map_decode["â"] = "�"; map_decode["ã"] = "�"; map_decode["ä"] = "�"; map_decode["å"] = "�"; map_decode["æ"] = "�"; map_decode["ç"] = "�"; map_decode["è"] = "�"; map_decode["é"] = "�"; map_decode["ê"] = "�"; map_decode["ë"] = "�"; map_decode["ì"] = "�"; map_decode["í"] = "�"; map_decode["î"] = "�"; map_decode["ï"] = "�"; map_decode["ð"] = "�"; map_decode["ñ"] = "�"; map_decode["ò"] = "�"; map_decode["ó"] = "�"; map_decode["ô"] = "�"; map_decode["õ"] = "�"; map_decode["ö"] = "�"; map_decode["÷"] = "�"; map_decode["ø"] = "�"; map_decode["ù"] = "�"; map_decode["ú"] = "�"; map_decode["û"] = "�"; map_decode["ü"] = "�"; map_decode["ý"] = "�"; map_decode["þ"] = "�"; map_decode["ÿ"] = "�"; // FIXME: This is just a hack to make Goran Mekic's name work. map_decode["�\x87"] = "c"; } std::string UTF8::fromLatin1(std::string const& s) { std::string ret; for(int i = 0; i < (int)s.length(); i++) { std::string c; if((unsigned char)s[i] <= 0x7F) { c = s.substr(i, 1); } else { c = map_encode[s.substr(i, 1)]; } // If c == "", the character wasn't found in the map. // Ignore this case for now and just push an empty string in this case. ret.append(c); } return ret; } std::string UTF8::toLatin1(std::string const& s) { std::string ret; int width = 1; for(int i = 0; i < (int)s.length(); i += width) { if(/*(unsigned char)s[i]>=0x00&&*/ (unsigned char)s[i] <= 0x7F) { width = 1; // 00-7F -> 1 byte } if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF) { width = 2; // C2-DF -> 2 bytes } if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF) { width = 3; // E0-EF -> 3 bytes } if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4) { width = 4; // F0-F4 -> 4 bytes } std::string c; if(width == 1) { c = s.substr(i, 1); } else { c = map_decode[s.substr(i, width)]; } // If c == "", the character wasn't found in the map. // Ignore this case for now and just push an empty string in this case. ret.append(c); } return ret; }