/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/***************************************************************************
 *            utf8.cc
 *
 *  Tue Feb 27 19:18:23 CET 2007
 *  Copyright  2006 Bent Bisballe Nyeng
 *  deva@aasimon.org
 ****************************************************************************/

/*
 *  This file is part of DrumGizmo.
 *
 *  DrumGizmo is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  DrumGizmo is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with DrumGizmo; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
 */
#include "utf8.h"

UTF8::UTF8()
{
	// Encode Map
	map_encode["�"] = "€";
	map_encode["�"] = "";
	map_encode["�"] = "‚";
	map_encode["�"] = "ƒ";
	map_encode["�"] = "„";
	map_encode["�"] = "…";
	map_encode["�"] = "†";
	map_encode["�"] = "‡";
	map_encode["�"] = "ˆ";
	map_encode["�"] = "‰";
	map_encode["�"] = "Š";
	map_encode["�"] = "‹";
	map_encode["�"] = "Œ";
	map_encode["�"] = "";
	map_encode["�"] = "Ž";
	map_encode["�"] = "";
	map_encode["�"] = "";
	map_encode["�"] = "‘";
	map_encode["�"] = "’";
	map_encode["�"] = "“";
	map_encode["�"] = "”";
	map_encode["�"] = "•";
	map_encode["�"] = "–";
	map_encode["�"] = "—";
	map_encode["�"] = "˜";
	map_encode["�"] = "™";
	map_encode["�"] = "š";
	map_encode["�"] = "›";
	map_encode["�"] = "œ";
	map_encode["�"] = "";
	map_encode["�"] = "ž";
	map_encode["�"] = "Ÿ";
	map_encode["�"] = " ";
	map_encode["�"] = "¡";
	map_encode["�"] = "¢";
	map_encode["�"] = "£";
	map_encode["�"] = "¤";
	map_encode["�"] = "¥";
	map_encode["�"] = "¦";
	map_encode["�"] = "§";
	map_encode["�"] = "¨";
	map_encode["�"] = "©";
	map_encode["�"] = "ª";
	map_encode["�"] = "«";
	map_encode["�"] = "¬";
	map_encode["�"] = "­";
	map_encode["�"] = "®";
	map_encode["�"] = "¯";
	map_encode["�"] = "°";
	map_encode["�"] = "±";
	map_encode["�"] = "²";
	map_encode["�"] = "³";
	map_encode["�"] = "´";
	map_encode["�"] = "µ";
	map_encode["�"] = "¶";
	map_encode["�"] = "·";
	map_encode["�"] = "¸";
	map_encode["�"] = "¹";
	map_encode["�"] = "º";
	map_encode["�"] = "»";
	map_encode["�"] = "¼";
	map_encode["�"] = "½";
	map_encode["�"] = "¾";
	map_encode["�"] = "¿";
	map_encode["�"] = "À";
	map_encode["�"] = "Á";
	map_encode["�"] = "Â";
	map_encode["�"] = "Ã";
	map_encode["�"] = "Ä";
	map_encode["�"] = "Å";
	map_encode["�"] = "Æ";
	map_encode["�"] = "Ç";
	map_encode["�"] = "È";
	map_encode["�"] = "É";
	map_encode["�"] = "Ê";
	map_encode["�"] = "Ë";
	map_encode["�"] = "Ì";
	map_encode["�"] = "Í";
	map_encode["�"] = "Î";
	map_encode["�"] = "Ï";
	map_encode["�"] = "Ð";
	map_encode["�"] = "Ñ";
	map_encode["�"] = "Ò";
	map_encode["�"] = "Ó";
	map_encode["�"] = "Ô";
	map_encode["�"] = "Õ";
	map_encode["�"] = "Ö";
	map_encode["�"] = "×";
	map_encode["�"] = "Ø";
	map_encode["�"] = "Ù";
	map_encode["�"] = "Ú";
	map_encode["�"] = "Û";
	map_encode["�"] = "Ü";
	map_encode["�"] = "Ý";
	map_encode["�"] = "Þ";
	map_encode["�"] = "ß";
	map_encode["�"] = "à";
	map_encode["�"] = "á";
	map_encode["�"] = "â";
	map_encode["�"] = "ã";
	map_encode["�"] = "ä";
	map_encode["�"] = "å";
	map_encode["�"] = "æ";
	map_encode["�"] = "ç";
	map_encode["�"] = "è";
	map_encode["�"] = "é";
	map_encode["�"] = "ê";
	map_encode["�"] = "ë";
	map_encode["�"] = "ì";
	map_encode["�"] = "í";
	map_encode["�"] = "î";
	map_encode["�"] = "ï";
	map_encode["�"] = "ð";
	map_encode["�"] = "ñ";
	map_encode["�"] = "ò";
	map_encode["�"] = "ó";
	map_encode["�"] = "ô";
	map_encode["�"] = "õ";
	map_encode["�"] = "ö";
	map_encode["�"] = "÷";
	map_encode["�"] = "ø";
	map_encode["�"] = "ù";
	map_encode["�"] = "ú";
	map_encode["�"] = "û";
	map_encode["�"] = "ü";
	map_encode["�"] = "ý";
	map_encode["�"] = "þ";
	map_encode["�"] = "ÿ";

	// Decode Map
	map_decode["€"] = "�";
	map_decode[""] = "�";
	map_decode["‚"] = "�";
	map_decode["ƒ"] = "�";
	map_decode["„"] = "�";
	map_decode["…"] = "�";
	map_decode["†"] = "�";
	map_decode["‡"] = "�";
	map_decode["ˆ"] = "�";
	map_decode["‰"] = "�";
	map_decode["Š"] = "�";
	map_decode["‹"] = "�";
	map_decode["Œ"] = "�";
	map_decode[""] = "�";
	map_decode["Ž"] = "�";
	map_decode[""] = "�";
	map_decode[""] = "�";
	map_decode["‘"] = "�";
	map_decode["’"] = "�";
	map_decode["“"] = "�";
	map_decode["”"] = "�";
	map_decode["•"] = "�";
	map_decode["–"] = "�";
	map_decode["—"] = "�";
	map_decode["˜"] = "�";
	map_decode["™"] = "�";
	map_decode["š"] = "�";
	map_decode["›"] = "�";
	map_decode["œ"] = "�";
	map_decode[""] = "�";
	map_decode["ž"] = "�";
	map_decode["Ÿ"] = "�";
	map_decode[" "] = "�";
	map_decode["¡"] = "�";
	map_decode["¢"] = "�";
	map_decode["£"] = "�";
	map_decode["¤"] = "�";
	map_decode["¥"] = "�";
	map_decode["¦"] = "�";
	map_decode["§"] = "�";
	map_decode["¨"] = "�";
	map_decode["©"] = "�";
	map_decode["ª"] = "�";
	map_decode["«"] = "�";
	map_decode["¬"] = "�";
	map_decode["­"] = "�";
	map_decode["®"] = "�";
	map_decode["¯"] = "�";
	map_decode["°"] = "�";
	map_decode["±"] = "�";
	map_decode["²"] = "�";
	map_decode["³"] = "�";
	map_decode["´"] = "�";
	map_decode["µ"] = "�";
	map_decode["¶"] = "�";
	map_decode["·"] = "�";
	map_decode["¸"] = "�";
	map_decode["¹"] = "�";
	map_decode["º"] = "�";
	map_decode["»"] = "�";
	map_decode["¼"] = "�";
	map_decode["½"] = "�";
	map_decode["¾"] = "�";
	map_decode["¿"] = "�";
	map_decode["À"] = "�";
	map_decode["Á"] = "�";
	map_decode["Â"] = "�";
	map_decode["Ã"] = "�";
	map_decode["Ä"] = "�";
	map_decode["Å"] = "�";
	map_decode["Æ"] = "�";
	map_decode["Ç"] = "�";
	map_decode["È"] = "�";
	map_decode["É"] = "�";
	map_decode["Ê"] = "�";
	map_decode["Ë"] = "�";
	map_decode["Ì"] = "�";
	map_decode["Í"] = "�";
	map_decode["Î"] = "�";
	map_decode["Ï"] = "�";
	map_decode["Ð"] = "�";
	map_decode["Ñ"] = "�";
	map_decode["Ò"] = "�";
	map_decode["Ó"] = "�";
	map_decode["Ô"] = "�";
	map_decode["Õ"] = "�";
	map_decode["Ö"] = "�";
	map_decode["×"] = "�";
	map_decode["Ø"] = "�";
	map_decode["Ù"] = "�";
	map_decode["Ú"] = "�";
	map_decode["Û"] = "�";
	map_decode["Ü"] = "�";
	map_decode["Ý"] = "�";
	map_decode["Þ"] = "�";
	map_decode["ß"] = "�";
	map_decode["à"] = "�";
	map_decode["á"] = "�";
	map_decode["â"] = "�";
	map_decode["ã"] = "�";
	map_decode["ä"] = "�";
	map_decode["å"] = "�";
	map_decode["æ"] = "�";
	map_decode["ç"] = "�";
	map_decode["è"] = "�";
	map_decode["é"] = "�";
	map_decode["ê"] = "�";
	map_decode["ë"] = "�";
	map_decode["ì"] = "�";
	map_decode["í"] = "�";
	map_decode["î"] = "�";
	map_decode["ï"] = "�";
	map_decode["ð"] = "�";
	map_decode["ñ"] = "�";
	map_decode["ò"] = "�";
	map_decode["ó"] = "�";
	map_decode["ô"] = "�";
	map_decode["õ"] = "�";
	map_decode["ö"] = "�";
	map_decode["÷"] = "�";
	map_decode["ø"] = "�";
	map_decode["ù"] = "�";
	map_decode["ú"] = "�";
	map_decode["û"] = "�";
	map_decode["ü"] = "�";
	map_decode["ý"] = "�";
	map_decode["þ"] = "�";
	map_decode["ÿ"] = "�";
	// FIXME: This is just a hack to make Goran Mekic's name work.
	map_decode["ć"] = "c";
}

std::string UTF8::fromLatin1(std::string const& s)
{
	std::string ret;

	for(int i = 0; i < (int)s.length(); i++)
	{
		std::string c;

		if((unsigned char)s[i] <= 0x7F)
		{
			c = s.substr(i, 1);
		}
		else
		{
			c = map_encode[s.substr(i, 1)];
		}

		// If c == "", the character wasn't found in the map.
		// Ignore this case for now and just push an empty string in this case.

		ret.append(c);
	}

	return ret;
}

std::string UTF8::toLatin1(std::string const& s)
{
	std::string ret;

	int width = 1;
	for(int i = 0; i < (int)s.length(); i += width)
	{
		if(/*(unsigned char)s[i]>=0x00&&*/ (unsigned char)s[i] <= 0x7F)
		{
			width = 1; // 00-7F -> 1 byte
		}
		if((unsigned char)s[i] >= 0xC2 && (unsigned char)s[i] <= 0xDF)
		{
			width = 2; // C2-DF -> 2 bytes
		}
		if((unsigned char)s[i] >= 0xE0 && (unsigned char)s[i] <= 0xEF)
		{
			width = 3; // E0-EF -> 3 bytes
		}
		if((unsigned char)s[i] >= 0xF0 && (unsigned char)s[i] <= 0xF4)
		{
			width = 4; // F0-F4 -> 4 bytes
		}

		std::string c;
		if(width == 1)
		{
			c = s.substr(i, 1);
		}
		else
		{
			c = map_decode[s.substr(i, width)];
		}

		// If c == "", the character wasn't found in the map.
		// Ignore this case for now and just push an empty string in this case.

		ret.append(c);
	}

	return ret;
}