[Cmake-commits] [cmake-commits] king committed CMakeLists.txt 1.434 1.435 cmXMLSafe.cxx 1.6 1.7 cm_utf8.c NONE 1.1 cm_utf8.h NONE 1.1
cmake-commits at cmake.org
cmake-commits at cmake.org
Tue Dec 8 15:43:58 EST 2009
Update of /cvsroot/CMake/CMake/Source
In directory public:/mounts/ram/cvs-serv1213/Source
Modified Files:
CMakeLists.txt cmXMLSafe.cxx
Added Files:
cm_utf8.c cm_utf8.h
Log Message:
CTest: Do not munge UTF-8 output in XML files
CTest filters the output from tools and tests to ensure that the XML
build/test result documents it generates have valid characters.
Previously we just converted all non-ASCII bytes into XML-escaped
Unicode characters of the corresponding index. This does not preserve
tool output encoded in UTF-8.
We now assume UTF-8 output from tools and implement decoding as
specified in RFC 3629. Valid characters are preserved, possibly with
XML escaping. Invalid byte sequences and characters are converted to
human-readable hex values with distinguishing tags. See issue #10003.
--- NEW FILE: cm_utf8.h ---
/*============================================================================
CMake - Cross Platform Makefile Generator
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#ifndef cm_utf8_h
#define cm_utf8_h
#ifdef __cplusplus
extern "C" {
#endif
/** Decode one UTF-8 character from the input byte range. On success,
stores the unicode character number in *pc and returns the first
position not extracted. On failure, returns 0. */
const char* cm_utf8_decode_character(const char* first, const char* last,
unsigned int* pc);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif
--- NEW FILE: cm_utf8.c ---
/*============================================================================
CMake - Cross Platform Makefile Generator
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#include "cm_utf8.h"
/*
RFC 3629
07-bit: 0xxxxxxx
11-bit: 110xxxxx 10xxxxxx
16-bit: 1110xxxx 10xxxxxx 10xxxxxx
21-bit: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
Pre-RFC Compatibility
26-bit: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
31-bit: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
/* Number of leading ones before a zero in the byte. */
static unsigned char const cm_utf8_ones[256] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,7,8
};
/* Mask away control bits from bytes with n leading ones. */
static unsigned char const cm_utf8_mask[7] = {
0xEF, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01
};
/* Minimum allowed value when first byte has n leading ones. */
static unsigned int const cm_utf8_min[7] = {
0, 0, 1u<<7, 1u<<11, 1u<<16, 1u<<21, 1u<<26 /*, 1u<<31 */
};
/*--------------------------------------------------------------------------*/
const char* cm_utf8_decode_character(const char* first, const char* last,
unsigned int* pc)
{
/* Count leading ones in the first byte. */
unsigned char c = *first++;
unsigned char const ones = cm_utf8_ones[c];
switch(ones)
{
case 0: *pc = c; return first; /* One-byte character. */
case 1: case 7: case 8: return 0; /* Invalid leading byte. */
default: break;
}
/* Extract bits from this multi-byte character. */
{
unsigned int uc = c & cm_utf8_mask[ones];
unsigned char left;
for(left = ones-1; left && first != last; --left)
{
c = *first++;
if(cm_utf8_ones[c] != 1)
{
return 0;
}
uc = (uc << 6) | (c & cm_utf8_mask[1]);
}
if(left > 0 || uc < cm_utf8_min[ones])
{
return 0;
}
*pc = uc;
return first;
}
}
Index: cmXMLSafe.cxx
===================================================================
RCS file: /cvsroot/CMake/CMake/Source/cmXMLSafe.cxx,v
retrieving revision 1.6
retrieving revision 1.7
diff -C 2 -d -r1.6 -r1.7
*** cmXMLSafe.cxx 28 Sep 2009 15:42:50 -0000 1.6
--- cmXMLSafe.cxx 8 Dec 2009 20:43:55 -0000 1.7
***************
*** 12,15 ****
--- 12,17 ----
#include "cmXMLSafe.h"
+ #include "cm_utf8.h"
+
#include <cmsys/ios/iostream>
#include <cmsys/ios/sstream>
***************
*** 54,95 ****
char const* first = self.Data;
char const* last = self.Data + self.Size;
! for(char const* ci = first; ci != last; ++ci)
{
! unsigned char c = static_cast<unsigned char>(*ci);
! switch(c)
{
! case '&': os << "&"; break;
! case '<': os << "<"; break;
! case '>': os << ">"; break;
! case '"': os << (self.DoQuotes? """ : "\""); break;
! case '\'': os << (self.DoQuotes? "'" : "'"); break;
! case '\t': os << "\t"; break;
! case '\n': os << "\n"; break;
! case '\r': break; // Ignore CR
! default:
! if(c >= 0x20 && c <= 0x7f)
! {
! os.put(static_cast<char>(c));
! }
! else
{
! // TODO: More complete treatment of program output character
! // encoding. Instead of escaping these bytes, we should
! // handle the current locale and its encoding.
! char buf[16];
! // http://www.w3.org/TR/REC-xml/#NT-Char
! if(c >= 0x80)
! {
! sprintf(buf, "&#x%hx;", static_cast<unsigned short>(c));
! }
! else
! {
! // We cannot use "&#x%hx;" here because this value is not
! // valid in XML. Instead use a human-readable hex value.
! sprintf(buf, "<0x%hx>", static_cast<unsigned short>(c));
! }
! os << buf;
}
! break;
}
}
--- 56,100 ----
char const* first = self.Data;
char const* last = self.Data + self.Size;
! while(first != last)
{
! unsigned int ch;
! if(const char* next = cm_utf8_decode_character(first, last, &ch))
{
! // http://www.w3.org/TR/REC-xml/#NT-Char
! if((ch >= 0x20 && ch <= 0xD7FF) ||
! (ch >= 0xE000 && ch <= 0xFFFD) ||
! (ch >= 0x10000 && ch <= 0x10FFFF) ||
! ch == 0x9 || ch == 0xA || ch == 0xD)
! {
! switch(ch)
{
! // Escape XML control characters.
! case '&': os << "&"; break;
! case '<': os << "<"; break;
! case '>': os << ">"; break;
! case '"': os << (self.DoQuotes? """ : "\""); break;
! case '\'': os << (self.DoQuotes? "'" : "'"); break;
! case '\r': break; // Ignore CR
! // Print the UTF-8 character.
! default: os.write(first, next-first); break;
}
! }
! else
! {
! // Use a human-readable hex value for this invalid character.
! char buf[16];
! sprintf(buf, "%X", ch);
! os << "[NON-XML-CHAR-0x" << buf << "]";
! }
!
! first = next;
! }
! else
! {
! ch = static_cast<unsigned char>(*first++);
! // Use a human-readable hex value for this invalid byte.
! char buf[16];
! sprintf(buf, "%X", ch);
! os << "[NON-UTF-8-BYTE-0x" << buf << "]";
}
}
Index: CMakeLists.txt
===================================================================
RCS file: /cvsroot/CMake/CMake/Source/CMakeLists.txt,v
retrieving revision 1.434
retrieving revision 1.435
diff -C 2 -d -r1.434 -r1.435
*** CMakeLists.txt 8 Dec 2009 16:44:28 -0000 1.434
--- CMakeLists.txt 8 Dec 2009 20:43:55 -0000 1.435
***************
*** 247,250 ****
--- 247,253 ----
cmakewizard.cxx
cmakewizard.h
+
+ cm_utf8.h
+ cm_utf8.c
)
More information about the Cmake-commits
mailing list