Add better support for UTF character conversions. Fixes Issue 78.
git-svn-id: http://google-breakpad.googlecode.com/svn/trunk@91 4c0a9323-5329-0410-9bdc-e9ce6186880e
This commit is contained in:
parent
8cc32d3bb8
commit
a2fa3dda54
2 changed files with 127 additions and 93 deletions
|
@ -38,9 +38,12 @@
|
|||
#include <unistd.h>
|
||||
|
||||
#include "client/minidump_file_writer-inl.h"
|
||||
#include "common/string_conversion.h"
|
||||
|
||||
namespace google_airbag {
|
||||
|
||||
const MDRVA MinidumpFileWriter::kInvalidMDRVA = static_cast<MDRVA>(-1);
|
||||
|
||||
MinidumpFileWriter::MinidumpFileWriter() : file_(-1), position_(0), size_(0) {
|
||||
}
|
||||
|
||||
|
@ -48,9 +51,9 @@ MinidumpFileWriter::~MinidumpFileWriter() {
|
|||
Close();
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::Open(const std::string &path) {
|
||||
bool MinidumpFileWriter::Open(const char *path) {
|
||||
assert(file_ == -1);
|
||||
file_ = open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
||||
file_ = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
||||
|
||||
return file_ != -1;
|
||||
}
|
||||
|
@ -60,58 +63,98 @@ bool MinidumpFileWriter::Close() {
|
|||
|
||||
if (file_ != -1) {
|
||||
ftruncate(file_, position_);
|
||||
result = close(file_) == 0;
|
||||
result = (close(file_) == 0);
|
||||
file_ = -1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::WriteString(const wchar_t *str,
|
||||
bool MinidumpFileWriter::CopyStringToMDString(const wchar_t *str,
|
||||
unsigned int length,
|
||||
TypedMDRVA<MDString> *mdstring) {
|
||||
bool result = true;
|
||||
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
|
||||
// Shortcut if wchar_t is the same size as MDString's buffer
|
||||
result = mdstring->Copy(str, mdstring->get()->length);
|
||||
} else {
|
||||
u_int16_t out[2];
|
||||
int out_idx = 0;
|
||||
|
||||
// Copy the string character by character
|
||||
while (length && result) {
|
||||
UTF32ToUTF16Char(*str, out);
|
||||
if (!out[0])
|
||||
return false;
|
||||
|
||||
// Process one character at a time
|
||||
--length;
|
||||
++str;
|
||||
|
||||
// Append the one or two UTF-16 characters. The first one will be non-
|
||||
// zero, but the second one may be zero, depending on the conversion from
|
||||
// UTF-32.
|
||||
int out_count = out[1] ? 2 : 1;
|
||||
int out_size = sizeof(u_int16_t) * out_count;
|
||||
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
|
||||
out_idx += out_count;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::CopyStringToMDString(const char *str,
|
||||
unsigned int length,
|
||||
TypedMDRVA<MDString> *mdstring) {
|
||||
bool result = true;
|
||||
u_int16_t out[2];
|
||||
int out_idx = 0;
|
||||
|
||||
// Copy the string character by character
|
||||
while (length && result) {
|
||||
int conversion_count = UTF8ToUTF16Char(str, length, out);
|
||||
if (!conversion_count)
|
||||
return false;
|
||||
|
||||
// Move the pointer along based on the nubmer of converted characters
|
||||
length -= conversion_count;
|
||||
str += conversion_count;
|
||||
|
||||
// Append the one or two UTF-16 characters
|
||||
int out_count = out[1] ? 2 : 1;
|
||||
int out_size = sizeof(u_int16_t) * out_count;
|
||||
result = mdstring->CopyIndexAfterObject(out_idx, out, out_size);
|
||||
out_idx += out_count;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename CharType>
|
||||
bool MinidumpFileWriter::WriteStringCore(const CharType *str,
|
||||
unsigned int length,
|
||||
MDLocationDescriptor *location) {
|
||||
assert(str);
|
||||
assert(location);
|
||||
// Calculate the mdstring length by either limiting to |length| as passed in
|
||||
// or by finding the location of the NULL character.
|
||||
unsigned int mdstring_length = 0;
|
||||
if (!length)
|
||||
length = INT_MAX;
|
||||
|
||||
unsigned int mdstring_length = 0;
|
||||
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
|
||||
}
|
||||
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length)
|
||||
;
|
||||
|
||||
// Allocate the string buffer
|
||||
TypedMDRVA<MDString> mdstring(this);
|
||||
|
||||
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
|
||||
return false;
|
||||
|
||||
// Set length excluding the NULL
|
||||
// Set length excluding the NULL and copy the string
|
||||
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
|
||||
|
||||
u_int16_t ch;
|
||||
bool result = true;
|
||||
|
||||
if (sizeof(wchar_t) == sizeof(u_int16_t)) {
|
||||
// Shortcut if wchar_t is the same size as MDString's buffer
|
||||
result = mdstring.Copy(str, mdstring.get()->length);
|
||||
} else {
|
||||
// Copy the string character by character
|
||||
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
|
||||
ch = str[c];
|
||||
// TODO: For the UTF-32->UTF-16 conversion, it's possible that there
|
||||
// are characters that will require more than one UTF-16 character to
|
||||
// represent it. Fully supporting this will require a more sophisticated
|
||||
// calculation of the size of the resulting string and for converting the
|
||||
// UTF-32 character into the two UTF-16 characters.
|
||||
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
|
||||
}
|
||||
}
|
||||
bool result = CopyStringToMDString(str, mdstring_length, &mdstring);
|
||||
|
||||
// NULL terminate
|
||||
if (result) {
|
||||
ch = 0;
|
||||
u_int16_t ch = 0;
|
||||
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
|
||||
|
||||
if (result)
|
||||
|
@ -121,52 +164,14 @@ bool MinidumpFileWriter::WriteString(const wchar_t *str,
|
|||
return result;
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::WriteString(const wchar_t *str, unsigned int length,
|
||||
MDLocationDescriptor *location) {
|
||||
return WriteStringCore(str, length, location);
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::WriteString(const char *str, unsigned int length,
|
||||
MDLocationDescriptor *location) {
|
||||
assert(str);
|
||||
assert(location);
|
||||
// Calculate the mdstring length by either limiting to |length| as passed in
|
||||
// or by finding the location of the NULL character.
|
||||
if (!length)
|
||||
length = INT_MAX;
|
||||
|
||||
unsigned int mdstring_length = 0;
|
||||
for (; mdstring_length < length && str[mdstring_length]; ++mdstring_length) {
|
||||
}
|
||||
|
||||
// Allocate the string buffer
|
||||
TypedMDRVA<MDString> mdstring(this);
|
||||
|
||||
if (!mdstring.AllocateObjectAndArray(mdstring_length + 1, sizeof(u_int16_t)))
|
||||
return false;
|
||||
|
||||
// Set length excluding the NULL
|
||||
mdstring.get()->length = mdstring_length * sizeof(u_int16_t);
|
||||
|
||||
u_int16_t ch;
|
||||
bool result = true;
|
||||
|
||||
// Copy the string character by character
|
||||
for (unsigned int c = 0; c < mdstring_length && result == true; c++) {
|
||||
ch = str[c];
|
||||
// TODO: For the UTF-8->UTF-16 conversion, it's possible that there are
|
||||
// characters that will convert one or more UTF-8 character into a single
|
||||
// UTF-16 character. Fully supporting this will require a more
|
||||
// sophisticated calculation of the size of the resulting string and for
|
||||
// converting the UTF-8 characters into a UTF-16 character.
|
||||
result = mdstring.CopyIndexAfterObject(c, &ch, sizeof(ch));
|
||||
}
|
||||
|
||||
// NULL terminate
|
||||
if (result) {
|
||||
ch = 0;
|
||||
result = mdstring.CopyIndexAfterObject(mdstring_length, &ch, sizeof(ch));
|
||||
|
||||
if (result)
|
||||
*location = mdstring.location();
|
||||
}
|
||||
|
||||
return result;
|
||||
return WriteStringCore(str, length, location);
|
||||
}
|
||||
|
||||
bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
||||
|
@ -177,7 +182,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
|||
|
||||
if (!mem.Allocate(size))
|
||||
return false;
|
||||
|
||||
if (!mem.Copy(src, mem.size()))
|
||||
return false;
|
||||
|
||||
|
@ -190,7 +194,6 @@ bool MinidumpFileWriter::WriteMemory(const void *src, size_t size,
|
|||
MDRVA MinidumpFileWriter::Allocate(size_t size) {
|
||||
assert(size);
|
||||
assert(file_ != -1);
|
||||
|
||||
size_t aligned_size = (size + 7) & ~7; // 64-bit alignment
|
||||
|
||||
if (position_ + aligned_size > size_) {
|
||||
|
@ -202,7 +205,6 @@ MDRVA MinidumpFileWriter::Allocate(size_t size) {
|
|||
growth = minimal_growth;
|
||||
|
||||
size_t new_size = size_ + growth;
|
||||
|
||||
if (ftruncate(file_, new_size) != 0)
|
||||
return kInvalidMDRVA;
|
||||
|
||||
|
|
|
@ -27,7 +27,9 @@
|
|||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// minidump_file_writer.h: Implements file-based minidump generation
|
||||
// minidump_file_writer.h: Implements file-based minidump generation. It's
|
||||
// intended to be used with the Google Airbag open source crash handling
|
||||
// project.
|
||||
|
||||
#ifndef CLIENT_MINIDUMP_FILE_WRITER_H__
|
||||
#define CLIENT_MINIDUMP_FILE_WRITER_H__
|
||||
|
@ -38,11 +40,26 @@
|
|||
|
||||
namespace google_airbag {
|
||||
|
||||
class UntypedMDRVA;
|
||||
template<typename MDType> class TypedMDRVA;
|
||||
|
||||
// The user of this class can Open() a file and add minidump streams, data, and
|
||||
// strings using the definitions in minidump_format.h. Since this class is
|
||||
// expected to be used in a situation where the current process may be
|
||||
// damaged, it will not allocate heap memory.
|
||||
// Sample usage:
|
||||
// MinidumpFileWriter writer;
|
||||
// writer.Open("/tmp/minidump.dmp");
|
||||
// TypedMDRVA<MDRawHeader> header(&writer_);
|
||||
// header.Allocate();
|
||||
// header->get()->signature = MD_HEADER_SIGNATURE;
|
||||
// :
|
||||
// writer.Close();
|
||||
class MinidumpFileWriter {
|
||||
public:
|
||||
// Invalid MDRVA (Minidump Relative Virtual Address)
|
||||
// returned on failed allocation
|
||||
static const MDRVA kInvalidMDRVA = static_cast<MDRVA>(-1);
|
||||
static const MDRVA kInvalidMDRVA;
|
||||
|
||||
MinidumpFileWriter();
|
||||
~MinidumpFileWriter();
|
||||
|
@ -50,13 +67,13 @@ class MinidumpFileWriter {
|
|||
// Open |path| as the destination of the minidump data. Any existing file
|
||||
// will be overwritten.
|
||||
// Return true on success, or false on failure
|
||||
bool Open(const std::string &path);
|
||||
bool Open(const char *path);
|
||||
|
||||
// Close the current file
|
||||
// Return true on success, or false on failure
|
||||
bool Close();
|
||||
|
||||
// Write |str| to a MDString.
|
||||
// Copy the contents of |str| to a MDString and write it to the file.
|
||||
// |str| is expected to be either UTF-16 or UTF-32 depending on the size
|
||||
// of wchar_t.
|
||||
// Maximum |length| of characters to copy from |str|, or specify 0 to use the
|
||||
|
@ -66,7 +83,7 @@ class MinidumpFileWriter {
|
|||
bool WriteString(const wchar_t *str, unsigned int length,
|
||||
MDLocationDescriptor *location);
|
||||
|
||||
// Similar to above with |str| as an UTF-8 encoded string
|
||||
// Same as above, except with |str| as a UTF-8 string
|
||||
bool WriteString(const char *str, unsigned int length,
|
||||
MDLocationDescriptor *location);
|
||||
|
||||
|
@ -79,7 +96,7 @@ class MinidumpFileWriter {
|
|||
bool Copy(MDRVA position, const void *src, ssize_t size);
|
||||
|
||||
// Return the current position for writing to the minidump
|
||||
MDRVA position() const { return position_; }
|
||||
inline MDRVA position() const { return position_; }
|
||||
|
||||
private:
|
||||
friend class UntypedMDRVA;
|
||||
|
@ -97,6 +114,21 @@ class MinidumpFileWriter {
|
|||
|
||||
// Current allocated size
|
||||
size_t size_;
|
||||
|
||||
// Copy |length| characters from |str| to |mdstring|. These are distinct
|
||||
// because the underlying MDString is a UTF-16 based string. The wchar_t
|
||||
// variant may need to create a MDString that has more characters than the
|
||||
// source |str|, whereas the UTF-8 variant may coalesce characters to form
|
||||
// a single UTF-16 character.
|
||||
bool CopyStringToMDString(const wchar_t *str, unsigned int length,
|
||||
TypedMDRVA<MDString> *mdstring);
|
||||
bool CopyStringToMDString(const char *str, unsigned int length,
|
||||
TypedMDRVA<MDString> *mdstring);
|
||||
|
||||
// The common templated code for writing a string
|
||||
template <typename CharType>
|
||||
bool WriteStringCore(const CharType *str, unsigned int length,
|
||||
MDLocationDescriptor *location);
|
||||
};
|
||||
|
||||
// Represents an untyped allocated chunk
|
||||
|
@ -112,13 +144,13 @@ class UntypedMDRVA {
|
|||
bool Allocate(size_t size);
|
||||
|
||||
// Returns the current position or kInvalidMDRVA if allocation failed
|
||||
MDRVA position() const { return position_; }
|
||||
inline MDRVA position() const { return position_; }
|
||||
|
||||
// Number of bytes allocated
|
||||
size_t size() const { return size_; }
|
||||
inline size_t size() const { return size_; }
|
||||
|
||||
// Return size and position
|
||||
MDLocationDescriptor location() const {
|
||||
inline MDLocationDescriptor location() const {
|
||||
MDLocationDescriptor location = { size_, position_ };
|
||||
return location;
|
||||
}
|
||||
|
@ -128,7 +160,7 @@ class UntypedMDRVA {
|
|||
bool Copy(MDRVA position, const void *src, size_t size);
|
||||
|
||||
// Copy |size| bytes from |src| to the current position
|
||||
bool Copy(const void *src, size_t size) {
|
||||
inline bool Copy(const void *src, size_t size) {
|
||||
return Copy(position_, src, size);
|
||||
}
|
||||
|
||||
|
@ -157,7 +189,7 @@ class TypedMDRVA : public UntypedMDRVA {
|
|||
data_(),
|
||||
allocation_state_(UNALLOCATED) {}
|
||||
|
||||
~TypedMDRVA() {
|
||||
inline ~TypedMDRVA() {
|
||||
// Ensure that the data_ object is written out
|
||||
if (allocation_state_ != ARRAY)
|
||||
Flush();
|
||||
|
|
Loading…
Reference in a new issue