Add better support for UTF character conversions. Fixes Issue 78.
git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@91 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
parent
8cc32d3bb8
commit
a2fa3dda54
2 changed files with 127 additions and 93 deletions
|
@ -38,9 +38,12 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "client/minidump_file_writer-inl.h"
|
#include "client/minidump_file_writer-inl.h"
|
||||||
|
#include "common/string_conversion.h"
|
||||||
|
|
||||||
namespace google_airbag {
|
namespace google_airbag {
|
||||||
|
|
||||||
|
const MDRVA MinidumpFileWriter::kInvalidMDRVA = static_cast<MDRVA>(-1);
|
||||||
|
|
||||||
MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) {
|
MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,9 +51,9 @@ MinidumpFileWriter::~MinidumpFileWriter() {
|
||||||
Close();
|
Close();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MinidumpFileWriter::Open(const std::string &path) {
|
bool MinidumpFileWriter::Open(const char *path) {
|
||||||
assert(file_ == -1);
|
assert(file_ == -1);
|
||||||
file_ = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
file_ = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
||||||
|
|
||||||
return file_ != -1;
|
return file_ != -1;
|
||||||
}
|
}
|
||||||
|
@ -60,58 +63,98 @@ bool MinidumpFileWriter::Close() {
|
||||||
|
|
||||||
if (file_ != -1) {
|
if (file_ != -1) {
|
||||||
ftruncate(file_, position_);
|
ftruncate(file_, position_);
|
||||||
result = close(file_) == 0;
|
result = (close(file_) == 0);
|
||||||
file_ = -1;
|
file_ = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MinidumpFileWriter::WriteString(const wchar_t *str,
|
bool MinidumpFileWriter::CopyStringToMDString(const wchar_t *str,
|
||||||
|
unsigned int length,
|
||||||
|
TypedMDRVA<MDString> *mdstring) {
|
||||||
|
bool result = true;
|
||||||
|
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
|
||||||
|
// Shortcut if wchar_t is the same size as MDString's buffer
|
||||||
|
result = mdstring->Copy(str, mdstring->get()->length);
|
||||||
|
} else {
|
||||||
|
u_int16_t out[2];
|
||||||
|
int out_idx = 0;
|
||||||
|
|
||||||
|
// Copy the string character by character
|
||||||
|
while (length && result) {
|
||||||
|
UTF32ToUTF16Char(*str, out);
|
||||||
|
if (!out[0])
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Process one character at a time
|
||||||
|
--length;
|
||||||
|
++str;
|
||||||
|
|
||||||
|
// Append the one or two UTF-16 characters. The first one will be non-
|
||||||
|
// zero, but the second one may be zero, depending on the conversion from
|
||||||
|
// UTF-32.
|
||||||
|
int out_count = out[1] ? 2 : 1;
|
||||||
|
int out_size = sizeof(u_int16_t) * out_count;
|
||||||
|
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
|
||||||
|
out_idx += out_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MinidumpFileWriter::CopyStringToMDString(const char *str,
|
||||||
|
unsigned int length,
|
||||||
|
TypedMDRVA<MDString> *mdstring) {
|
||||||
|
bool result = true;
|
||||||
|
u_int16_t out[2];
|
||||||
|
int out_idx = 0;
|
||||||
|
|
||||||
|
// Copy the string character by character
|
||||||
|
while (length && result) {
|
||||||
|
int conversion_count = UTF8ToUTF16Char(str, length, out);
|
||||||
|
if (!conversion_count)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Move the pointer along based on the nubmer of converted characters
|
||||||
|
length -= conversion_count;
|
||||||
|
str += conversion_count;
|
||||||
|
|
||||||
|
// Append the one or two UTF-16 characters
|
||||||
|
int out_count = out[1] ? 2 : 1;
|
||||||
|
int out_size = sizeof(u_int16_t) * out_count;
|
||||||
|
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
|
||||||
|
out_idx += out_count;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename CharType>
|
||||||
|
bool MinidumpFileWriter::WriteStringCore(const CharType *str,
|
||||||
unsigned int length,
|
unsigned int length,
|
||||||
MDLocationDescriptor *location) {
|
MDLocationDescriptor *location) {
|
||||||
assert(str);
|
assert(str);
|
||||||
assert(location);
|
assert(location);
|
||||||
// Calculate the mdstring length by either limiting to |length| as passed in
|
// Calculate the mdstring length by either limiting to |length| as passed in
|
||||||
// or by finding the location of the NULL character.
|
// or by finding the location of the NULL character.
|
||||||
|
unsigned int mdstring_length = 0;
|
||||||
if (!length)
|
if (!length)
|
||||||
length = INT_MAX;
|
length = INT_MAX;
|
||||||
|
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length)
|
||||||
unsigned int mdstring_length = 0;
|
;
|
||||||
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate the string buffer
|
// Allocate the string buffer
|
||||||
TypedMDRVA<MDString> mdstring(this);
|
TypedMDRVA<MDString> mdstring(this);
|
||||||
|
|
||||||
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
|
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Set length excluding the NULL
|
// Set length excluding the NULL and copy the string
|
||||||
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
|
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
|
||||||
|
bool result = CopyStringToMDString(str, mdstring_length, &mdstring);
|
||||||
u_int16_t ch;
|
|
||||||
bool result = true;
|
|
||||||
|
|
||||||
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
|
|
||||||
// Shortcut if wchar_t is the same size as MDString's buffer
|
|
||||||
result = mdstring.Copy(str, mdstring.get()->length);
|
|
||||||
} else {
|
|
||||||
// Copy the string character by character
|
|
||||||
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
|
|
||||||
ch = str[c];
|
|
||||||
// TODO: For the UTF-32->UTF-16 conversion, it's possible that there
|
|
||||||
// are characters that will require more than one UTF-16 character to
|
|
||||||
// represent it. Fully supporting this will require a more sophisticated
|
|
||||||
// calculation of the size of the resulting string and for converting the
|
|
||||||
// UTF-32 character into the two UTF-16 characters.
|
|
||||||
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NULL terminate
|
// NULL terminate
|
||||||
if (result) {
|
if (result) {
|
||||||
ch = 0;
|
u_int16_t ch = 0;
|
||||||
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
|
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
|
||||||
|
|
||||||
if (result)
|
if (result)
|
||||||
|
@ -121,52 +164,14 @@ bool MinidumpFileWriter::WriteString(const wchar_t *str,
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MinidumpFileWriter::WriteString(const wchar_t *str, unsigned int length,
|
||||||
|
MDLocationDescriptor *location) {
|
||||||
|
return WriteStringCore(str, length, location);
|
||||||
|
}
|
||||||
|
|
||||||
bool MinidumpFileWriter::WriteString(const char *str, unsigned int length,
|
bool MinidumpFileWriter::WriteString(const char *str, unsigned int length,
|
||||||
MDLocationDescriptor *location) {
|
MDLocationDescriptor *location) {
|
||||||
assert(str);
|
return WriteStringCore(str, length, location);
|
||||||
assert(location);
|
|
||||||
// Calculate the mdstring length by either limiting to |length| as passed in
|
|
||||||
// or by finding the location of the NULL character.
|
|
||||||
if (!length)
|
|
||||||
length = INT_MAX;
|
|
||||||
|
|
||||||
unsigned int mdstring_length = 0;
|
|
||||||
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate the string buffer
|
|
||||||
TypedMDRVA<MDString> mdstring(this);
|
|
||||||
|
|
||||||
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Set length excluding the NULL
|
|
||||||
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
|
|
||||||
|
|
||||||
u_int16_t ch;
|
|
||||||
bool result = true;
|
|
||||||
|
|
||||||
// Copy the string character by character
|
|
||||||
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
|
|
||||||
ch = str[c];
|
|
||||||
// TODO: For the UTF-8->UTF-16 conversion, it's possible that there are
|
|
||||||
// characters that will convert one or more UTF-8 character into a single
|
|
||||||
// UTF-16 character. Fully supporting this will require a more
|
|
||||||
// sophisticated calculation of the size of the resulting string and for
|
|
||||||
// converting the UTF-8 characters into a UTF-16 character.
|
|
||||||
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
|
|
||||||
}
|
|
||||||
|
|
||||||
// NULL terminate
|
|
||||||
if (result) {
|
|
||||||
ch = 0;
|
|
||||||
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
|
|
||||||
|
|
||||||
if (result)
|
|
||||||
*location = mdstring.location();
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
||||||
|
@ -177,7 +182,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
||||||
|
|
||||||
if (!mem.Allocate(size))
|
if (!mem.Allocate(size))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!mem.Copy(src, mem.size()))
|
if (!mem.Copy(src, mem.size()))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -190,7 +194,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
||||||
MDRVA MinidumpFileWriter::Allocate(size_t size) {
|
MDRVA MinidumpFileWriter::Allocate(size_t size) {
|
||||||
assert(size);
|
assert(size);
|
||||||
assert(file_ != -1);
|
assert(file_ != -1);
|
||||||
|
|
||||||
size_t aligned_size = (size + 7) & ~7; // 64-bit alignment
|
size_t aligned_size = (size + 7) & ~7; // 64-bit alignment
|
||||||
|
|
||||||
if (position_ + aligned_size > size_) {
|
if (position_ + aligned_size > size_) {
|
||||||
|
@ -202,7 +205,6 @@ MDRVA MinidumpFileWriter::Allocate(size_t size) {
|
||||||
growth = minimal_growth;
|
growth = minimal_growth;
|
||||||
|
|
||||||
size_t new_size = size_ + growth;
|
size_t new_size = size_ + growth;
|
||||||
|
|
||||||
if (ftruncate(file_, new_size) != 0)
|
if (ftruncate(file_, new_size) != 0)
|
||||||
return kInvalidMDRVA;
|
return kInvalidMDRVA;
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,9 @@
|
||||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
// minidump_file_writer.h: Implements file-based minidump generation
|
// minidump_file_writer.h: Implements file-based minidump generation. It's
|
||||||
|
// intended to be used with the Google Airbag open source crash handling
|
||||||
|
// project.
|
||||||
|
|
||||||
#ifndef CLIENT_MINIDUMP_FILE_WRITER_H__
|
#ifndef CLIENT_MINIDUMP_FILE_WRITER_H__
|
||||||
#define CLIENT_MINIDUMP_FILE_WRITER_H__
|
#define CLIENT_MINIDUMP_FILE_WRITER_H__
|
||||||
|
@ -38,11 +40,26 @@
|
||||||
|
|
||||||
namespace google_airbag {
|
namespace google_airbag {
|
||||||
|
|
||||||
|
class UntypedMDRVA;
|
||||||
|
template<typename MDType> class TypedMDRVA;
|
||||||
|
|
||||||
|
// The user of this class can Open() a file and add minidump streams, data, and
|
||||||
|
// strings using the definitions in minidump_format.h. Since this class is
|
||||||
|
// expected to be used in a situation where the current process may be
|
||||||
|
// damaged, it will not allocate heap memory.
|
||||||
|
// Sample usage:
|
||||||
|
// MinidumpFileWriter writer;
|
||||||
|
// writer.Open("/tmp/minidump.dmp");
|
||||||
|
// TypedMDRVA<MDRawHeader> header(&writer_);
|
||||||
|
// header.Allocate();
|
||||||
|
// header->get()->signature = MD_HEADER_SIGNATURE;
|
||||||
|
// :
|
||||||
|
// writer.Close();
|
||||||
class MinidumpFileWriter {
|
class MinidumpFileWriter {
|
||||||
public:
|
public:
|
||||||
// Invalid MDRVA (Minidump Relative Virtual Address)
|
// Invalid MDRVA (Minidump Relative Virtual Address)
|
||||||
// returned on failed allocation
|
// returned on failed allocation
|
||||||
static const MDRVA kInvalidMDRVA = static_cast<MDRVA>(-1);
|
static const MDRVA kInvalidMDRVA;
|
||||||
|
|
||||||
MinidumpFileWriter();
|
MinidumpFileWriter();
|
||||||
~MinidumpFileWriter();
|
~MinidumpFileWriter();
|
||||||
|
@ -50,13 +67,13 @@ class MinidumpFileWriter {
|
||||||
// Open |path| as the destination of the minidump data. Any existing file
|
// Open |path| as the destination of the minidump data. Any existing file
|
||||||
// will be overwritten.
|
// will be overwritten.
|
||||||
// Return true on success, or false on failure
|
// Return true on success, or false on failure
|
||||||
bool Open(const std::string &path);
|
bool Open(const char *path);
|
||||||
|
|
||||||
// Close the current file
|
// Close the current file
|
||||||
// Return true on success, or false on failure
|
// Return true on success, or false on failure
|
||||||
bool Close();
|
bool Close();
|
||||||
|
|
||||||
// Write |str| to a MDString.
|
// Copy the contents of |str| to a MDString and write it to the file.
|
||||||
// |str| is expected to be either UTF-16 or UTF-32 depending on the size
|
// |str| is expected to be either UTF-16 or UTF-32 depending on the size
|
||||||
// of wchar_t.
|
// of wchar_t.
|
||||||
// Maximum |length| of characters to copy from |str|, or specify 0 to use the
|
// Maximum |length| of characters to copy from |str|, or specify 0 to use the
|
||||||
|
@ -66,7 +83,7 @@ class MinidumpFileWriter {
|
||||||
bool WriteString(const wchar_t *str, unsigned int length,
|
bool WriteString(const wchar_t *str, unsigned int length,
|
||||||
MDLocationDescriptor *location);
|
MDLocationDescriptor *location);
|
||||||
|
|
||||||
// Similar to above with |str| as an UTF-8 encoded string
|
// Same as above, except with |str| as a UTF-8 string
|
||||||
bool WriteString(const char *str, unsigned int length,
|
bool WriteString(const char *str, unsigned int length,
|
||||||
MDLocationDescriptor *location);
|
MDLocationDescriptor *location);
|
||||||
|
|
||||||
|
@ -79,7 +96,7 @@ class MinidumpFileWriter {
|
||||||
bool Copy(MDRVA position, const void *src, ssize_t size);
|
bool Copy(MDRVA position, const void *src, ssize_t size);
|
||||||
|
|
||||||
// Return the current position for writing to the minidump
|
// Return the current position for writing to the minidump
|
||||||
MDRVA position() const { return position_; }
|
inline MDRVA position() const { return position_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
friend class UntypedMDRVA;
|
friend class UntypedMDRVA;
|
||||||
|
@ -97,6 +114,21 @@ class MinidumpFileWriter {
|
||||||
|
|
||||||
// Current allocated size
|
// Current allocated size
|
||||||
size_t size_;
|
size_t size_;
|
||||||
|
|
||||||
|
// Copy |length| characters from |str| to |mdstring|. These are distinct
|
||||||
|
// because the underlying MDString is a UTF-16 based string. The wchar_t
|
||||||
|
// variant may need to create a MDString that has more characters than the
|
||||||
|
// source |str|, whereas the UTF-8 variant may coalesce characters to form
|
||||||
|
// a single UTF-16 character.
|
||||||
|
bool CopyStringToMDString(const wchar_t *str, unsigned int length,
|
||||||
|
TypedMDRVA<MDString> *mdstring);
|
||||||
|
bool CopyStringToMDString(const char *str, unsigned int length,
|
||||||
|
TypedMDRVA<MDString> *mdstring);
|
||||||
|
|
||||||
|
// The common templated code for writing a string
|
||||||
|
template <typename CharType>
|
||||||
|
bool WriteStringCore(const CharType *str, unsigned int length,
|
||||||
|
MDLocationDescriptor *location);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Represents an untyped allocated chunk
|
// Represents an untyped allocated chunk
|
||||||
|
@ -112,13 +144,13 @@ class UntypedMDRVA {
|
||||||
bool Allocate(size_t size);
|
bool Allocate(size_t size);
|
||||||
|
|
||||||
// Returns the current position or kInvalidMDRVA if allocation failed
|
// Returns the current position or kInvalidMDRVA if allocation failed
|
||||||
MDRVA position() const { return position_; }
|
inline MDRVA position() const { return position_; }
|
||||||
|
|
||||||
// Number of bytes allocated
|
// Number of bytes allocated
|
||||||
size_t size() const { return size_; }
|
inline size_t size() const { return size_; }
|
||||||
|
|
||||||
// Return size and position
|
// Return size and position
|
||||||
MDLocationDescriptor location() const {
|
inline MDLocationDescriptor location() const {
|
||||||
MDLocationDescriptor location = { size_, position_ };
|
MDLocationDescriptor location = { size_, position_ };
|
||||||
return location;
|
return location;
|
||||||
}
|
}
|
||||||
|
@ -128,7 +160,7 @@ class UntypedMDRVA {
|
||||||
bool Copy(MDRVA position, const void *src, size_t size);
|
bool Copy(MDRVA position, const void *src, size_t size);
|
||||||
|
|
||||||
// Copy |size| bytes from |src| to the current position
|
// Copy |size| bytes from |src| to the current position
|
||||||
bool Copy(const void *src, size_t size) {
|
inline bool Copy(const void *src, size_t size) {
|
||||||
return Copy(position_, src, size);
|
return Copy(position_, src, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,7 +189,7 @@ class TypedMDRVA : public UntypedMDRVA {
|
||||||
data_(),
|
data_(),
|
||||||
allocation_state_(UNALLOCATED) {}
|
allocation_state_(UNALLOCATED) {}
|
||||||
|
|
||||||
~TypedMDRVA() {
|
inline ~TypedMDRVA() {
|
||||||
// Ensure that the data_ object is written out
|
// Ensure that the data_ object is written out
|
||||||
if (allocation_state_ != ARRAY)
|
if (allocation_state_ != ARRAY)
|
||||||
Flush();
|
Flush();
|
||||||
|
|
Loading…
Reference in a new issue