Use std::u16string for conversion between UTF-8 and UTF-16, FS:USER functions

This commit is contained in:
archshift 2014-11-10 14:36:32 -08:00
parent 0ad5964c8b
commit 1f7c4ab7f6
5 changed files with 257 additions and 192 deletions

View file

@ -9,6 +9,7 @@
#ifdef _WIN32
#include <Windows.h>
#include <codecvt>
#else
#include <iconv.h>
#endif
@ -411,7 +412,19 @@ std::string UriEncode(const std::string & sSrc)
#ifdef _WIN32
std::string UTF16ToUTF8(const std::wstring& input)
std::string UTF16ToUTF8(const std::u16string& input)
{
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
return convert.to_bytes(input);
}
std::u16string UTF8ToUTF16(const std::string& input)
{
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
return convert.from_bytes(input);
}
static std::string UTF16ToUTF8(const std::wstring& input)
{
auto const size = WideCharToMultiByte(CP_UTF8, 0, input.data(), input.size(), nullptr, 0, nullptr, nullptr);
@ -424,7 +437,7 @@ std::string UTF16ToUTF8(const std::wstring& input)
return output;
}
std::wstring CPToUTF16(u32 code_page, const std::string& input)
static std::wstring CPToUTF16(u32 code_page, const std::string& input)
{
auto const size = MultiByteToWideChar(code_page, 0, input.data(), input.size(), nullptr, 0);
@ -437,7 +450,7 @@ std::wstring CPToUTF16(u32 code_page, const std::string& input)
return output;
}
std::wstring UTF8ToUTF16(const std::string& input)
std::wstring UTF8ToUTF16W(const std::string &input)
{
return CPToUTF16(CP_UTF8, input);
}
@ -455,19 +468,21 @@ std::string CP1252ToUTF8(const std::string& input)
#else
template <typename T>
std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
static std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
{
std::string result;
iconv_t const conv_desc = iconv_open("UTF-8", fromcode);
if ((iconv_t)-1 == conv_desc)
if ((iconv_t)(-1) == conv_desc)
{
ERROR_LOG(COMMON, "Iconv initialization failure [%s]: %s", fromcode, strerror(errno));
iconv_close(conv_desc);
return {};
}
else
{
size_t const in_bytes = sizeof(T) * input.size();
size_t const out_buffer_size = 4 * in_bytes;
const size_t in_bytes = sizeof(T) * input.size();
// Multiply by 4, which is the max number of bytes to encode a codepoint
const size_t out_buffer_size = 4 * in_bytes;
std::string out_buffer;
out_buffer.resize(out_buffer_size);
@ -477,17 +492,17 @@ std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
auto dst_buffer = &out_buffer[0];
size_t dst_bytes = out_buffer.size();
while (src_bytes != 0)
while (0 != src_bytes)
{
size_t const iconv_result = iconv(conv_desc, (char**)(&src_buffer), &src_bytes,
&dst_buffer, &dst_bytes);
if ((size_t)-1 == iconv_result)
if (static_cast<size_t>(-1) == iconv_result)
{
if (EILSEQ == errno || EINVAL == errno)
{
// Try to skip the bad character
if (src_bytes != 0)
if (0 != src_bytes)
{
--src_bytes;
++src_buffer;
@ -505,11 +520,71 @@ std::string CodeToUTF8(const char* fromcode, const std::basic_string<T>& input)
out_buffer.swap(result);
iconv_close(conv_desc);
}
return result;
}
std::u16string UTF8ToUTF16(const std::string& input)
{
std::u16string result;
iconv_t const conv_desc = iconv_open("UTF-16", "UTF-8");
if ((iconv_t)(-1) == conv_desc)
{
ERROR_LOG(COMMON, "Iconv initialization failure [UTF-8]: %s", strerror(errno));
iconv_close(conv_desc);
return {};
}
const size_t in_bytes = sizeof(char) * input.size();
// Multiply by 4, which is the max number of bytes to encode a codepoint
const size_t out_buffer_size = 4 * sizeof(char16_t) * in_bytes;
std::u16string out_buffer;
out_buffer.resize(out_buffer_size);
char* src_buffer = const_cast<char*>(&input[0]);
size_t src_bytes = in_bytes;
char* dst_buffer = (char*)(&out_buffer[0]);
size_t dst_bytes = out_buffer.size();
while (0 != src_bytes)
{
size_t const iconv_result = iconv(conv_desc, &src_buffer, &src_bytes,
&dst_buffer, &dst_bytes);
if (static_cast<size_t>(-1) == iconv_result)
{
if (EILSEQ == errno || EINVAL == errno)
{
// Try to skip the bad character
if (0 != src_bytes)
{
--src_bytes;
++src_buffer;
}
}
else
{
ERROR_LOG(COMMON, "iconv failure [UTF-8]: %s", strerror(errno));
break;
}
}
}
out_buffer.resize(out_buffer_size - dst_bytes);
out_buffer.swap(result);
iconv_close(conv_desc);
return result;
}
std::string UTF16ToUTF8(const std::u16string& input)
{
return CodeToUTF8("UTF-16", input);
}
std::string CP1252ToUTF8(const std::string& input)
{
//return CodeToUTF8("CP1252//TRANSLIT", input);
@ -523,19 +598,6 @@ std::string SHIFTJISToUTF8(const std::string& input)
return CodeToUTF8("SJIS", input);
}
std::string UTF16ToUTF8(const std::wstring& input)
{
std::string result =
// CodeToUTF8("UCS-2", input);
// CodeToUTF8("UCS-2LE", input);
// CodeToUTF8("UTF-16", input);
CodeToUTF8("UTF-16LE", input);
// TODO: why is this needed?
result.erase(std::remove(result.begin(), result.end(), 0x00), result.end());
return result;
}
#endif
}

View file

@ -89,20 +89,22 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st
std::string UriDecode(const std::string & sSrc);
std::string UriEncode(const std::string & sSrc);
std::string UTF16ToUTF8(const std::u16string& input);
std::u16string UTF8ToUTF16(const std::string& input);
std::string CP1252ToUTF8(const std::string& str);
std::string SHIFTJISToUTF8(const std::string& str);
std::string UTF16ToUTF8(const std::wstring& str);
#ifdef _WIN32
std::wstring UTF8ToUTF16(const std::string& str);
std::wstring UTF8ToUTF16W(const std::string& str);
#ifdef _UNICODE
inline std::string TStrToUTF8(const std::wstring& str)
{ return UTF16ToUTF8(str); }
inline std::wstring UTF8ToTStr(const std::string& str)
{ return UTF8ToUTF16(str); }
{ return UTF8ToUTF16W(str); }
#else
inline std::string TStrToUTF8(const std::string& str)
{ return str; }

View file

@ -7,11 +7,13 @@
#include <memory>
#include "common/common_types.h"
#include "common/string_util.h"
#include "common/bit_field.h"
#include "core/file_sys/file.h"
#include "core/file_sys/directory.h"
#include "core/mem_map.h"
#include "core/hle/kernel/kernel.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -19,6 +21,15 @@
namespace FileSys {
// Path string type
enum LowPathType : u32 {
Invalid = 0,
Empty = 1,
Binary = 2,
Char = 3,
Wchar = 4
};
union Mode {
u32 hex;
BitField<0, 1, u32> read_flag;
@ -26,6 +37,94 @@ union Mode {
BitField<2, 1, u32> create_flag;
};
class Path {
public:
Path():
type(Invalid)
{
}
Path(LowPathType type, u32 size, u32 pointer):
type(type)
{
switch (type) {
case Binary:
{
u8* data = Memory::GetPointer(pointer);
binary = std::vector<u8>(data, data + size);
break;
}
case Char:
{
const char* data = reinterpret_cast<const char*>(Memory::GetPointer(pointer));
string = std::string(data, size - 1); // Data is always null-terminated.
break;
}
case Wchar:
{
const char16_t* data = reinterpret_cast<const char16_t*>(Memory::GetPointer(pointer));
u16str = std::u16string(data, size/2 - 1); // Data is always null-terminated.
break;
}
}
}
LowPathType GetType() const {
return type;
}
const std::string AsString() const {
switch (GetType()) {
case Char:
return string;
case Wchar:
return Common::UTF16ToUTF8(u16str);
case Empty:
return {};
default:
ERROR_LOG(KERNEL, "LowPathType cannot be converted to string!");
return {};
}
}
const std::u16string AsU16Str() const {
switch (GetType()) {
case Char:
return Common::UTF8ToUTF16(string);
case Wchar:
return u16str;
case Empty:
return {};
default:
ERROR_LOG(KERNEL, "LowPathType cannot be converted to u16string!");
return {};
}
}
const std::vector<u8> AsBinary() const {
switch (GetType()) {
case Binary:
return binary;
case Char:
return std::vector<u8>(string.begin(), string.end());
case Wchar:
return std::vector<u8>(u16str.begin(), u16str.end());
case Empty:
return {};
default:
ERROR_LOG(KERNEL, "LowPathType cannot be converted to binary!");
return {};
}
}
private:
LowPathType type;
std::vector<u8> binary;
std::string string;
std::u16string u16str;
};
class Archive : NonCopyable {
public:
/// Supported archive types

View file

@ -14,75 +14,6 @@
namespace FS_User {
FS_Path::FS_Path(LowPathType type, u32 size, u32 pointer):
type(type)
{
switch (type) {
case Binary:
{
auto data = Memory::GetPointer(pointer);
binary = std::vector<u8>(data, data + size);
break;
}
case Char:
{
auto data = reinterpret_cast<const char*>(Memory::GetPointer(pointer));
string = std::string(data, size - 1);
}
case Wchar:
{
auto data = reinterpret_cast<const char16_t*>(Memory::GetPointer(pointer));
u16str = std::u16string(data, size/2 - 1);
}
}
}
FS_Path::LowPathType FS_Path::GetType() const {
return type;
}
const std::vector<u8>& FS_Path::GetBinary() const {
return binary;
}
const std::string& FS_Path::GetString() const {
_dbg_assert_msg_(KERNEL, type == Char, "LowPathType is not Char!");
return string;
}
const std::u16string& FS_Path::GetU16Str() const {
_dbg_assert_msg_(KERNEL, type == Wchar, "LowPathType is not Wchar!");
return u16str;
}
std::string FS_Path::AsString() {
switch (GetType()) {
case FS_Path::Char:
return GetString();
case FS_Path::Empty:
return {};
case FS_Path::Wchar:
{
auto str16 = GetU16Str();
return Common::UTF16ToUTF8(std::wstring(str16.cbegin(), str16.cend()));
}
}
}
std::u16string FS_Path::AsU16Str() {
switch (GetType()) {
case FS_Path::Wchar:
return GetU16Str();
case FS_Path::Empty:
return {};
case FS_Path::Char:
{
auto str = GetString();
return std::u16string(str.cbegin(), str.cend());
}
}
}
// We currently return 0 for success and -1 for failure in cmd_buff[1]. -1 was chosen because it
// puts all the sections of the http://3dbrew.org/wiki/Error_codes to something non-zero, to make
// sure we don't mislead the application into thinking something worked.
@ -103,17 +34,17 @@ void OpenFile(Service::Interface* self) {
// TODO(Link Mauve): cmd_buff[2], aka archive handle lower word, isn't used according to
// 3dmoo's or ctrulib's implementations. Triple check if it's really the case.
Handle archive_handle = static_cast<Handle>(cmd_buff[3]);
auto filename_type = static_cast<FS_Path::LowPathType>(cmd_buff[4]);
auto filename_type = static_cast<FileSys::LowPathType>(cmd_buff[4]);
u32 filename_size = cmd_buff[5];
FileSys::Mode mode; mode.hex = cmd_buff[6];
u32 attributes = cmd_buff[7]; // TODO(Link Mauve): do something with those attributes.
u32 filename_ptr = cmd_buff[9];
FS_Path file_path(filename_type, filename_size, filename_ptr);
FileSys::Path file_path(filename_type, filename_size, filename_ptr);
std::string file_string;
switch (file_path.GetType()) {
case FS_Path::Char:
case FS_Path::Wchar:
case FileSys::Char:
case FileSys::Wchar:
file_string = file_path.AsString();
break;
default:
@ -141,9 +72,9 @@ void OpenFileDirectly(Service::Interface* self) {
u32* cmd_buff = Service::GetCommandBuffer();
auto archive_id = static_cast<FileSys::Archive::IdCode>(cmd_buff[2]);
auto archivename_type = static_cast<FS_Path::LowPathType>(cmd_buff[3]);
auto archivename_type = static_cast<FileSys::LowPathType>(cmd_buff[3]);
u32 archivename_size = cmd_buff[4];
auto filename_type = static_cast<FS_Path::LowPathType>(cmd_buff[5]);
auto filename_type = static_cast<FileSys::LowPathType>(cmd_buff[5]);
u32 filename_size = cmd_buff[6];
FileSys::Mode mode; mode.hex = cmd_buff[7];
u32 attributes = cmd_buff[8]; // TODO(Link Mauve): do something with those attributes.
@ -153,7 +84,7 @@ void OpenFileDirectly(Service::Interface* self) {
DEBUG_LOG(KERNEL, "archive_type=%d archive_size=%d file_type=%d file_size=%d file_mode=%d file_attrs=%d",
archivename_type, archivename_size, filename_type, filename_size, mode, attributes);
if (archivename_type != FS_Path::Empty) {
if (archivename_type != FileSys::Empty) {
ERROR_LOG(KERNEL, "archive LowPath type other than empty is currently unsupported");
cmd_buff[1] = -1;
return;
@ -172,11 +103,11 @@ void OpenFileDirectly(Service::Interface* self) {
return;
}
FS_Path file_path(filename_type, filename_size, filename_ptr);
FileSys::Path file_path(filename_type, filename_size, filename_ptr);
std::string file_string;
switch (file_path.GetType()) {
case FS_Path::Char:
case FS_Path::Wchar:
case FileSys::Char:
case FileSys::Wchar:
file_string = file_path.AsString();
break;
default:
@ -214,15 +145,15 @@ void CreateDirectory(Service::Interface* self) {
// TODO: cmd_buff[2], aka archive handle lower word, isn't used according to
// 3dmoo's or ctrulib's implementations. Triple check if it's really the case.
Handle archive_handle = static_cast<Handle>(cmd_buff[3]);
auto dirname_type = static_cast<FS_Path::LowPathType>(cmd_buff[4]);
auto dirname_type = static_cast<FileSys::LowPathType>(cmd_buff[4]);
u32 dirname_size = cmd_buff[5];
u32 dirname_ptr = cmd_buff[8];
FS_Path dir_path(dirname_type, dirname_size, dirname_ptr);
FileSys::Path dir_path(dirname_type, dirname_size, dirname_ptr);
std::string dir_string;
switch (dir_path.GetType()) {
case FS_Path::Char:
case FS_Path::Wchar:
case FileSys::Char:
case FileSys::Wchar:
dir_string = dir_path.AsString();
break;
default:
@ -243,15 +174,15 @@ void OpenDirectory(Service::Interface* self) {
// TODO(Link Mauve): cmd_buff[2], aka archive handle lower word, isn't used according to
// 3dmoo's or ctrulib's implementations. Triple check if it's really the case.
Handle archive_handle = static_cast<Handle>(cmd_buff[2]);
auto dirname_type = static_cast<FS_Path::LowPathType>(cmd_buff[3]);
auto dirname_type = static_cast<FileSys::LowPathType>(cmd_buff[3]);
u32 dirname_size = cmd_buff[4];
u32 dirname_ptr = cmd_buff[6];
FS_Path dir_path(dirname_type, dirname_size, dirname_ptr);
FileSys::Path dir_path(dirname_type, dirname_size, dirname_ptr);
std::string dir_string;
switch (dir_path.GetType()) {
case FS_Path::Char:
case FS_Path::Wchar:
case FileSys::Char:
case FileSys::Wchar:
dir_string = dir_path.AsString();
break;
default:
@ -278,13 +209,13 @@ void OpenArchive(Service::Interface* self) {
u32* cmd_buff = Service::GetCommandBuffer();
auto archive_id = static_cast<FileSys::Archive::IdCode>(cmd_buff[1]);
auto archivename_type = static_cast<FS_Path::LowPathType>(cmd_buff[2]);
auto archivename_type = static_cast<FileSys::LowPathType>(cmd_buff[2]);
u32 archivename_size = cmd_buff[3];
u32 archivename_ptr = cmd_buff[5];
DEBUG_LOG(KERNEL, "type=%d size=%d", archivename_type, archivename_size);
if (archivename_type != FS_Path::Empty) {
if (archivename_type != FileSys::Empty) {
ERROR_LOG(KERNEL, "archive LowPath type other than empty is currently unsupported");
cmd_buff[1] = -1;
return;

View file

@ -11,35 +11,6 @@
namespace FS_User {
class FS_Path {
public:
// Command to access archive file
enum LowPathType : u32 {
Invalid = 0,
Empty = 1,
Binary = 2,
Char = 3,
Wchar = 4
};
FS_Path(LowPathType type, u32 size, u32 pointer);
LowPathType GetType() const;
const std::vector<u8>& GetBinary() const;
const std::string& GetString() const;
const std::u16string& GetU16Str() const;
std::string AsString();
std::u16string AsU16Str();
private:
LowPathType type;
std::vector<u8> binary;
std::string string;
std::u16string u16str;
};
/// Interface to "fs:USER" service
class Interface : public Service::Interface {
public: