Add better support for UTF character conversions. Fixes Issue 78.

git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@91 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
waylonis 2006-12-16 01:01:19 +00:00
parent 8cc32d3bb8
commit a2fa3dda54
2 changed files with 127 additions and 93 deletions

View file

@ -38,9 +38,12 @@
#include <unistd.h> #include <unistd.h>
#include "client/minidump_file_writer-inl.h" #include "client/minidump_file_writer-inl.h"
#include "common/string_conversion.h"
namespace google_airbag { namespace google_airbag {
const MDRVA MinidumpFileWriter::kInvalidMDRVA = static_cast<MDRVA>(-1);
MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) { MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) {
} }
@ -48,9 +51,9 @@ MinidumpFileWriter::~MinidumpFileWriter() {
Close(); Close();
} }
bool MinidumpFileWriter::Open(const std::string &path) { bool MinidumpFileWriter::Open(const char *path) {
assert(file_ == -1); assert(file_ == -1);
file_ = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666); file_ = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
return file_ != -1; return file_ != -1;
} }
@ -60,58 +63,98 @@ bool MinidumpFileWriter::Close() {
if (file_ != -1) { if (file_ != -1) {
ftruncate(file_, position_); ftruncate(file_, position_);
result = close(file_) == 0; result = (close(file_) == 0);
file_ = -1; file_ = -1;
} }
return result; return result;
} }
bool MinidumpFileWriter::WriteString(const wchar_t *str, bool MinidumpFileWriter::CopyStringToMDString(const wchar_t *str,
unsigned int length,
TypedMDRVA<MDString> *mdstring) {
bool result = true;
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
// Shortcut if wchar_t is the same size as MDString's buffer
result = mdstring->Copy(str, mdstring->get()->length);
} else {
u_int16_t out[2];
int out_idx = 0;
// Copy the string character by character
while (length && result) {
UTF32ToUTF16Char(*str, out);
if (!out[0])
return false;
// Process one character at a time
--length;
++str;
// Append the one or two UTF-16 characters. The first one will be non-
// zero, but the second one may be zero, depending on the conversion from
// UTF-32.
int out_count = out[1] ? 2 : 1;
int out_size = sizeof(u_int16_t) * out_count;
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
out_idx += out_count;
}
}
return result;
}
bool MinidumpFileWriter::CopyStringToMDString(const char *str,
unsigned int length,
TypedMDRVA<MDString> *mdstring) {
bool result = true;
u_int16_t out[2];
int out_idx = 0;
// Copy the string character by character
while (length && result) {
int conversion_count = UTF8ToUTF16Char(str, length, out);
if (!conversion_count)
return false;
// Move the pointer along based on the nubmer of converted characters
length -= conversion_count;
str += conversion_count;
// Append the one or two UTF-16 characters
int out_count = out[1] ? 2 : 1;
int out_size = sizeof(u_int16_t) * out_count;
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
out_idx += out_count;
}
return result;
}
template <typename CharType>
bool MinidumpFileWriter::WriteStringCore(const CharType *str,
unsigned int length, unsigned int length,
MDLocationDescriptor *location) { MDLocationDescriptor *location) {
assert(str); assert(str);
assert(location); assert(location);
// Calculate the mdstring length by either limiting to |length| as passed in // Calculate the mdstring length by either limiting to |length| as passed in
// or by finding the location of the NULL character. // or by finding the location of the NULL character.
unsigned int mdstring_length = 0;
if (!length) if (!length)
length = INT_MAX; length = INT_MAX;
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length)
unsigned int mdstring_length = 0; ;
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
}
// Allocate the string buffer // Allocate the string buffer
TypedMDRVA<MDString> mdstring(this); TypedMDRVA<MDString> mdstring(this);
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t))) if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
return false; return false;
// Set length excluding the NULL // Set length excluding the NULL and copy the string
mdstring.get()->length = mdstring_length * sizeof(u_int16_t); mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
bool result = CopyStringToMDString(str, mdstring_length, &mdstring);
u_int16_t ch;
bool result = true;
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
// Shortcut if wchar_t is the same size as MDString's buffer
result = mdstring.Copy(str, mdstring.get()->length);
} else {
// Copy the string character by character
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
ch = str[c];
// TODO: For the UTF-32->UTF-16 conversion, it's possible that there
// are characters that will require more than one UTF-16 character to
// represent it. Fully supporting this will require a more sophisticated
// calculation of the size of the resulting string and for converting the
// UTF-32 character into the two UTF-16 characters.
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
}
}
// NULL terminate // NULL terminate
if (result) { if (result) {
ch = 0; u_int16_t ch = 0;
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch)); result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
if (result) if (result)
@ -121,52 +164,14 @@ bool MinidumpFileWriter::WriteString(const wchar_t *str,
return result; return result;
} }
bool MinidumpFileWriter::WriteString(const wchar_t *str, unsigned int length,
MDLocationDescriptor *location) {
return WriteStringCore(str, length, location);
}
bool MinidumpFileWriter::WriteString(const char *str, unsigned int length, bool MinidumpFileWriter::WriteString(const char *str, unsigned int length,
MDLocationDescriptor *location) { MDLocationDescriptor *location) {
assert(str); return WriteStringCore(str, length, location);
assert(location);
// Calculate the mdstring length by either limiting to |length| as passed in
// or by finding the location of the NULL character.
if (!length)
length = INT_MAX;
unsigned int mdstring_length = 0;
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
}
// Allocate the string buffer
TypedMDRVA<MDString> mdstring(this);
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
return false;
// Set length excluding the NULL
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
u_int16_t ch;
bool result = true;
// Copy the string character by character
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
ch = str[c];
// TODO: For the UTF-8->UTF-16 conversion, it's possible that there are
// characters that will convert one or more UTF-8 character into a single
// UTF-16 character. Fully supporting this will require a more
// sophisticated calculation of the size of the resulting string and for
// converting the UTF-8 characters into a UTF-16 character.
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
}
// NULL terminate
if (result) {
ch = 0;
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
if (result)
*location = mdstring.location();
}
return result;
} }
bool MinidumpFileWriter::WriteMemory(const void *src, size_t size, bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
@ -177,7 +182,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
if (!mem.Allocate(size)) if (!mem.Allocate(size))
return false; return false;
if (!mem.Copy(src, mem.size())) if (!mem.Copy(src, mem.size()))
return false; return false;
@ -190,7 +194,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
MDRVA MinidumpFileWriter::Allocate(size_t size) { MDRVA MinidumpFileWriter::Allocate(size_t size) {
assert(size); assert(size);
assert(file_ != -1); assert(file_ != -1);
size_t aligned_size = (size + 7) & ~7; // 64-bit alignment size_t aligned_size = (size + 7) & ~7; // 64-bit alignment
if (position_ + aligned_size > size_) { if (position_ + aligned_size > size_) {
@ -202,7 +205,6 @@ MDRVA MinidumpFileWriter::Allocate(size_t size) {
growth = minimal_growth; growth = minimal_growth;
size_t new_size = size_ + growth; size_t new_size = size_ + growth;
if (ftruncate(file_, new_size) != 0) if (ftruncate(file_, new_size) != 0)
return kInvalidMDRVA; return kInvalidMDRVA;

View file

@ -27,7 +27,9 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// minidump_file_writer.h: Implements file-based minidump generation // minidump_file_writer.h: Implements file-based minidump generation. It's
// intended to be used with the Google Airbag open source crash handling
// project.
#ifndef CLIENT_MINIDUMP_FILE_WRITER_H__ #ifndef CLIENT_MINIDUMP_FILE_WRITER_H__
#define CLIENT_MINIDUMP_FILE_WRITER_H__ #define CLIENT_MINIDUMP_FILE_WRITER_H__
@ -38,11 +40,26 @@
namespace google_airbag { namespace google_airbag {
class UntypedMDRVA;
template<typename MDType> class TypedMDRVA;
// The user of this class can Open() a file and add minidump streams, data, and
// strings using the definitions in minidump_format.h. Since this class is
// expected to be used in a situation where the current process may be
// damaged, it will not allocate heap memory.
// Sample usage:
// MinidumpFileWriter writer;
// writer.Open("/tmp/minidump.dmp");
// TypedMDRVA<MDRawHeader> header(&writer_);
// header.Allocate();
// header->get()->signature = MD_HEADER_SIGNATURE;
// :
// writer.Close();
class MinidumpFileWriter { class MinidumpFileWriter {
public: public:
// Invalid MDRVA (Minidump Relative Virtual Address) // Invalid MDRVA (Minidump Relative Virtual Address)
// returned on failed allocation // returned on failed allocation
static const MDRVA kInvalidMDRVA = static_cast<MDRVA>(-1); static const MDRVA kInvalidMDRVA;
MinidumpFileWriter(); MinidumpFileWriter();
~MinidumpFileWriter(); ~MinidumpFileWriter();
@ -50,13 +67,13 @@ class MinidumpFileWriter {
// Open |path| as the destination of the minidump data. Any existing file // Open |path| as the destination of the minidump data. Any existing file
// will be overwritten. // will be overwritten.
// Return true on success, or false on failure // Return true on success, or false on failure
bool Open(const std::string &path); bool Open(const char *path);
// Close the current file // Close the current file
// Return true on success, or false on failure // Return true on success, or false on failure
bool Close(); bool Close();
// Write |str| to a MDString. // Copy the contents of |str| to a MDString and write it to the file.
// |str| is expected to be either UTF-16 or UTF-32 depending on the size // |str| is expected to be either UTF-16 or UTF-32 depending on the size
// of wchar_t. // of wchar_t.
// Maximum |length| of characters to copy from |str|, or specify 0 to use the // Maximum |length| of characters to copy from |str|, or specify 0 to use the
@ -66,7 +83,7 @@ class MinidumpFileWriter {
bool WriteString(const wchar_t *str, unsigned int length, bool WriteString(const wchar_t *str, unsigned int length,
MDLocationDescriptor *location); MDLocationDescriptor *location);
// Similar to above with |str| as an UTF-8 encoded string // Same as above, except with |str| as a UTF-8 string
bool WriteString(const char *str, unsigned int length, bool WriteString(const char *str, unsigned int length,
MDLocationDescriptor *location); MDLocationDescriptor *location);
@ -79,7 +96,7 @@ class MinidumpFileWriter {
bool Copy(MDRVA position, const void *src, ssize_t size); bool Copy(MDRVA position, const void *src, ssize_t size);
// Return the current position for writing to the minidump // Return the current position for writing to the minidump
MDRVA position() const { return position_; } inline MDRVA position() const { return position_; }
private: private:
friend class UntypedMDRVA; friend class UntypedMDRVA;
@ -97,6 +114,21 @@ class MinidumpFileWriter {
// Current allocated size // Current allocated size
size_t size_; size_t size_;
// Copy |length| characters from |str| to |mdstring|. These are distinct
// because the underlying MDString is a UTF-16 based string. The wchar_t
// variant may need to create a MDString that has more characters than the
// source |str|, whereas the UTF-8 variant may coalesce characters to form
// a single UTF-16 character.
bool CopyStringToMDString(const wchar_t *str, unsigned int length,
TypedMDRVA<MDString> *mdstring);
bool CopyStringToMDString(const char *str, unsigned int length,
TypedMDRVA<MDString> *mdstring);
// The common templated code for writing a string
template <typename CharType>
bool WriteStringCore(const CharType *str, unsigned int length,
MDLocationDescriptor *location);
}; };
// Represents an untyped allocated chunk // Represents an untyped allocated chunk
@ -112,13 +144,13 @@ class UntypedMDRVA {
bool Allocate(size_t size); bool Allocate(size_t size);
// Returns the current position or kInvalidMDRVA if allocation failed // Returns the current position or kInvalidMDRVA if allocation failed
MDRVA position() const { return position_; } inline MDRVA position() const { return position_; }
// Number of bytes allocated // Number of bytes allocated
size_t size() const { return size_; } inline size_t size() const { return size_; }
// Return size and position // Return size and position
MDLocationDescriptor location() const { inline MDLocationDescriptor location() const {
MDLocationDescriptor location = { size_, position_ }; MDLocationDescriptor location = { size_, position_ };
return location; return location;
} }
@ -128,7 +160,7 @@ class UntypedMDRVA {
bool Copy(MDRVA position, const void *src, size_t size); bool Copy(MDRVA position, const void *src, size_t size);
// Copy |size| bytes from |src| to the current position // Copy |size| bytes from |src| to the current position
bool Copy(const void *src, size_t size) { inline bool Copy(const void *src, size_t size) {
return Copy(position_, src, size); return Copy(position_, src, size);
} }
@ -157,7 +189,7 @@ class TypedMDRVA : public UntypedMDRVA {
data_(), data_(),
allocation_state_(UNALLOCATED) {} allocation_state_(UNALLOCATED) {}
~TypedMDRVA() { inline ~TypedMDRVA() {
// Ensure that the data_ object is written out // Ensure that the data_ object is written out
if (allocation_state_ != ARRAY) if (allocation_state_ != ARRAY)
Flush(); Flush();