summaryrefslogtreecommitdiff
path: root/tests/test_unicode.cpp
diff options
context:
space:
mode:
authorarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2009-10-20 21:39:43 +0000
committerarseny.kapoulkine <arseny.kapoulkine@99668b35-9821-0410-8761-19e4c4f06640>2009-10-20 21:39:43 +0000
commit0ceaa38aeb258b6cc055fa002f718a921d52c6ab (patch)
tree868027b15390ee6fea0c395dc522b531d35ea03f /tests/test_unicode.cpp
parent009f3d50d2736b0b7d2457d9aa67cd967f39e316 (diff)
tests: Added forgotten test files
git-svn-id: http://pugixml.googlecode.com/svn/trunk@168 99668b35-9821-0410-8761-19e4c4f06640
Diffstat (limited to 'tests/test_unicode.cpp')
-rw-r--r--tests/test_unicode.cpp37
1 files changed, 37 insertions, 0 deletions
diff --git a/tests/test_unicode.cpp b/tests/test_unicode.cpp
new file mode 100644
index 0000000..b763355
--- /dev/null
+++ b/tests/test_unicode.cpp
@@ -0,0 +1,37 @@
+#include "common.hpp"
+
+// letters taken from http://www.utf8-chartable.de/
+
+TEST(as_utf16)
+{
+ // valid 1-byte, 2-byte and 3-byte inputs
+ CHECK(as_utf16("?\xd0\x80\xe2\x80\xbd") == L"?\x0400\x203D");
+
+ // invalid 1-byte input
+ CHECK(as_utf16("\xb0") == L" ");
+
+ // valid 4-byte input
+ std::wstring b4 = as_utf16("\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
+ CHECK(b4.size() == 3 && b4[0] == (wchar_t)0x97624 && b4[1] == L' ' && b4[2] == (wchar_t)0x1003ff);
+
+ // invalid 5-byte input
+ std::wstring b5 = as_utf16("\xf8\nbcd");
+ CHECK(b5 == L" \nbcd");
+}
+
+TEST(as_utf8)
+{
+ // valid 1-byte, 2-byte and 3-byte outputs
+ CHECK(as_utf8(L"?\x0400\x203D") == "?\xd0\x80\xe2\x80\xbd");
+
+ // valid 4-byte output
+#if 0
+ // requires 4-byte wchar_t :(
+ CHECK(as_utf8(L"\x97624 \x1003ff") == "\xf2\x97\x98\xa4 \xf4\x80\x8f\xbf");
+#endif
+}
+
+TEST_XML(parse_bom_utf8, "\xef\xbb\xbf<node/>")
+{
+ CHECK_NODE(doc, "<node />");
+}