zenXML
Straightforward C++ XML Processing
|
00001 // ************************************************************************** 00002 // * This file is part of the zenXML project. It is distributed under the * 00003 // * Boost Software License, Version 1.0. See accompanying file * 00004 // * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt. * 00005 // * Copyright (C) 2011 ZenJu (zhnmju123 AT gmx.de) * 00006 // ************************************************************************** 00007 00008 #ifndef STRING_TOOLS_HEADER_213458973046 00009 #define STRING_TOOLS_HEADER_213458973046 00010 00011 #include <cstddef> //size_t 00012 #include <cctype> //isspace 00013 #include <cwctype> //iswspace 00014 #include <cwchar> //swprintf 00015 #include <cstdio> //sprintf 00016 #include <algorithm> 00017 #include <cassert> 00018 #include <sstream> 00019 #include <functional> 00020 #include <vector> 00021 #include "loki/TypeManip.h" 00022 #include "loki/EmptyType.h" 00023 #include "loki/TypeTraits.h" 00024 #include "assert_static.h" 00025 #ifdef _MSC_VER 00026 template <> struct Loki::IsCustomUnsignedInt<unsigned __int64> { enum { value = 1 }; }; 00027 template <> struct Loki::IsCustomSignedInt <signed __int64> { enum { value = 1 }; }; 00028 #endif 00029 00030 00031 //enhance arbitray string class with useful non-member functions: 00032 namespace zen 00033 { 00034 template <class C> size_t cStringLength(const C* str); //strlen() 00035 template <class C> bool cStringIsWhiteSpace(C ch); 00036 template <class C> bool cStringIsDigit(C ch); 00037 00038 //uniform access to string-like types: classes and character arrays 00039 /* 00040 strBegin(): 00041 std::wstring str(L"dummy"); 00042 char array[] = "dummy"; 00043 const wchar_t* iter = strBegin(str); //returns str.c_str() 00044 const char* iter2 = strBegin(array); //returns array 00045 00046 strLength(): 00047 strLength(str); //equals str.size() 00048 strLength(array); //equals cStringLength(array) 00049 00050 StringTraits<>: 00051 StringTraits<std::wstring>::CharType //equals wchar_t 00052 StringTraits<wchar_t[5]> ::CharType //equals wchar_t 00053 StringTraits<const wchar_t*>::isStringLike; //equals "true" 00054 StringTraits<const int*> ::isStringLike; //equals "false" 00055 StringTraits<std::wstring>::isStringClass //equals "true" 00056 StringTraits<wchar_t[5]> ::isStringClass //equals "false" 00057 */ 00058 00059 template <class S, class T> bool startsWith(const S& str, const T& prefix); //both S and T can be strings or char/wchar_t arrays or simple char/wchar_t 00060 template <class S, class T> bool endsWith (const S& str, const T& postfix); // 00061 00062 template <class S, class T> S afterLast (const S& str, const T& ch); //returns the whole string if ch not found 00063 template <class S, class T> S beforeLast (const S& str, const T& ch); //returns empty string if ch not found 00064 template <class S, class T> S afterFirst (const S& str, const T& ch); //returns empty string if ch not found 00065 template <class S, class T> S beforeFirst(const S& str, const T& ch); //returns the whole string if ch not found 00066 00067 template <class S, class T> std::vector<S> split(const S& str, const T& delimiter); 00068 template <class S> void truncate(S& str, size_t newLen); 00069 template <class S, class T, class U> void replace(S& str, const T& old, const U& replacement, bool replaceAll = true); 00070 template <class S> void trim(S& str, bool fromLeft = true, bool fromRight = true); 00071 00072 //high-performance conversion from numbers to strings 00073 template <class S, class Num> S toString(const Num& number); 00074 template <class Num, class S> Num toNumber(const S& str); 00075 00076 //string to string conversion: converst string-like type into compatible target string class 00077 template <class T, class S> T cvrtString(const S& str); 00078 00079 00080 00081 00082 00083 00084 00085 00086 00087 00088 00089 00090 00091 00092 00093 00094 00095 00096 00097 00098 00099 00100 00101 00102 00103 00104 00105 00106 00107 00108 00109 00110 00111 00112 00113 00114 //---------------------- implementation ---------------------- 00115 00116 template <class C> inline 00117 size_t cStringLength(const C* str) //strlen() 00118 { 00119 assert_static((Loki::IsSameType<C, char>::value || Loki::IsSameType<C, wchar_t>::value)); 00120 size_t len = 0; 00121 while (*str++ != 0) 00122 ++len; 00123 return len; 00124 } 00125 00126 00127 template <> inline 00128 bool cStringIsWhiteSpace(char ch) 00129 { 00130 //caveat 1: std::isspace() takes an int, but expects an unsigned char 00131 //caveat 2: some parts of UTF-8 chars are erroneously seen as whitespace, e.g. the a0 from "\xec\x8b\xa0" (MSVC) 00132 return static_cast<unsigned char>(ch) < 128 && 00133 std::isspace(static_cast<unsigned char>(ch)) != 0; 00134 } 00135 00136 template <> inline bool cStringIsWhiteSpace(unsigned char ch) { return cStringIsWhiteSpace<char>(ch); } 00137 template <> inline bool cStringIsWhiteSpace(signed char ch) { return cStringIsWhiteSpace<char>(ch); } 00138 template <> inline bool cStringIsWhiteSpace(wchar_t ch) { return std::iswspace(ch) != 0; } 00139 00140 template <> inline 00141 bool cStringIsDigit(char ch) 00142 { 00143 return std::isdigit(static_cast<unsigned char>(ch)) != 0; //caveat: takes an int, but expects an unsigned char 00144 } 00145 00146 00147 template <> 00148 inline 00149 bool cStringIsDigit(wchar_t ch) 00150 { 00151 return std::iswdigit(ch) != 0; 00152 } 00153 00154 namespace implementation 00155 { 00156 template <class T> 00157 struct UnArray { typedef T NonArrayType; }; 00158 00159 template <class T, int N> 00160 struct UnArray<T[N]> { typedef T NonArrayType; }; 00161 00162 template <class T> 00163 struct UnPointer { typedef T NonPtrType; }; 00164 00165 template <class T> 00166 struct UnPointer<T*> { typedef T NonPtrType; }; 00167 00168 template <class T> 00169 struct UnReference { typedef T NonRefType; }; 00170 00171 template <class T> 00172 struct UnReference<T&> { typedef T NonRefType; }; 00173 00174 00175 template<typename T> 00176 class HasValueType 00177 { 00178 typedef char Yes[1]; 00179 typedef char No [2]; 00180 00181 template <typename U> class HelperTp {}; 00182 00183 //detect presence of a member type called value_type 00184 template <class U> static Yes& hasMemberValueType(HelperTp<typename U::value_type>*); 00185 template <class U> static No& hasMemberValueType(...); 00186 00187 public: 00188 enum { Result = sizeof(hasMemberValueType<T>(NULL)) == sizeof(Yes) 00189 }; 00190 }; 00191 00192 00193 template<typename T, bool isClassType> 00194 class HasStringMembers 00195 { 00196 public: 00197 enum { Result = false }; 00198 }; 00199 00200 template<typename T> 00201 class HasStringMembers<T, true> 00202 { 00203 typedef char Yes[1]; 00204 typedef char No [2]; 00205 00206 //detect presence of member functions (without specific restriction on return type, within T or one of it's base classes) 00207 template <typename U, U t> class HelperFn {}; 00208 00209 struct Fallback 00210 { 00211 int c_str; 00212 int length; 00213 }; 00214 00215 template <class U> 00216 struct Helper2 : public U, public Fallback {}; //U must be a class-type! 00217 00218 //we don't know the exact declaration of the member attribute (may be in base class), but we know what NOT to expect: 00219 template <class U> static No& hasMemberCstr(HelperFn<int Fallback::*, &Helper2<U>::c_str>*); 00220 template <class U> static Yes& hasMemberCstr(...); 00221 00222 template <class U> static No& hasMemberLength(HelperFn<int Fallback::*, &Helper2<U>::length>*); 00223 template <class U> static Yes& hasMemberLength(...); 00224 public: 00225 enum { Result = sizeof(hasMemberCstr <T>(NULL)) == sizeof(Yes) && 00226 sizeof(hasMemberLength<T>(NULL)) == sizeof(Yes) 00227 }; 00228 }; 00229 00230 template <class S, bool isStringClass> struct StringTraits2 { typedef Loki::EmptyType Result; }; //"StringTraits2": fix some VS bug with namespace and partial template specialization 00231 00232 template <class S> struct StringTraits2<S, true> { typedef typename S::value_type Result; }; 00233 template <> struct StringTraits2<char, false> { typedef char Result; }; 00234 template <> struct StringTraits2<wchar_t, false> { typedef wchar_t Result; }; 00235 } 00236 00237 template <class S> 00238 struct StringTraits 00239 { 00240 private: 00241 typedef typename implementation::UnReference<S>::NonRefType NonRefType; 00242 typedef typename Loki::TypeTraits<NonRefType>::NonConstType UndecoratedType; 00243 00244 typedef typename implementation::UnArray<UndecoratedType>::NonArrayType NonArrayType; 00245 typedef typename implementation::UnPointer<NonArrayType>::NonPtrType NonPtrType; 00246 typedef typename Loki::TypeTraits<NonPtrType>::NonConstType NonConstValType; //handle "const char* const" 00247 public: 00248 enum 00249 { 00250 isStringClass = implementation::HasStringMembers<UndecoratedType, implementation::HasValueType<UndecoratedType>::Result>::Result 00251 }; 00252 00253 typedef typename implementation::StringTraits2<NonConstValType, isStringClass>::Result CharType; 00254 00255 enum 00256 { 00257 isStringLike = Loki::IsSameType<CharType, char>::value || Loki::IsSameType<CharType, wchar_t>::value 00258 }; 00259 }; 00260 00261 00262 template <class S> inline 00263 const typename StringTraits<S>::CharType* strBegin(const S& str, typename S::value_type dummy = 0) { return str.c_str(); } //SFINAE: T must be a "string" 00264 00265 template <class Char> 00266 inline const typename StringTraits<Char>::CharType* strBegin(const Char* str) { return str; } 00267 inline const char* strBegin(const char& ch) { return &ch; } 00268 inline const wchar_t* strBegin(const wchar_t& ch) { return &ch; } 00269 00270 00271 template <class S> inline 00272 size_t strLength(const S& str, typename S::value_type dummy = 0) { return str.length(); } //SFINAE: T must be a "string" 00273 00274 template <class Char> 00275 inline size_t strLength(const Char* str) { return cStringLength(str); } 00276 inline size_t strLength(char) { return 1; } 00277 inline size_t strLength(wchar_t) { return 1; } 00278 00279 00280 template <class S, class T> inline 00281 bool startsWith(const S& str, const T& prefix) 00282 { 00283 assert_static(StringTraits<S>::isStringLike); 00284 assert_static(StringTraits<T>::isStringLike); 00285 00286 const size_t pfLength = strLength(prefix); 00287 if (strLength(str) < pfLength) 00288 return false; 00289 00290 return std::equal(strBegin(str), strBegin(str) + pfLength, 00291 strBegin(prefix)); 00292 } 00293 00294 00295 template <class S, class T> inline 00296 bool endsWith(const S& str, const T& postfix) 00297 { 00298 assert_static(StringTraits<S>::isStringLike); 00299 assert_static(StringTraits<T>::isStringLike); 00300 00301 size_t strLen = strLength(str); 00302 size_t pfLen = strLength(postfix); 00303 if (strLen < pfLen) 00304 return false; 00305 00306 typedef typename StringTraits<S>::CharType CharType; 00307 00308 const CharType* cmpBegin = strBegin(str) + strLen - pfLen; 00309 return std::equal(cmpBegin, cmpBegin + pfLen, 00310 strBegin(postfix)); 00311 } 00312 00313 00314 // get all characters after the last occurence of ch 00315 // (returns the whole string if ch not found) 00316 template <class S, class T> inline 00317 S afterLast(const S& str, const T& ch) 00318 { 00319 assert_static(StringTraits<T>::isStringLike); 00320 00321 const size_t pos = str.rfind(ch); 00322 if (pos != S::npos) 00323 { 00324 size_t chLen = strLength(ch); 00325 return S(str.c_str() + pos + chLen, str.length() - pos - chLen); 00326 } 00327 else 00328 return str; 00329 } 00330 00331 00332 // get all characters before the last occurence of ch 00333 // (returns empty string if ch not found) 00334 template <class S, class T> inline 00335 S beforeLast(const S& str, const T& ch) 00336 { 00337 assert_static(StringTraits<T>::isStringLike); 00338 00339 const size_t pos = str.rfind(ch); 00340 if (pos != S::npos) 00341 return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! 00342 else 00343 return S(); 00344 } 00345 00346 00347 //returns empty string if ch not found 00348 template <class S, class T> inline 00349 S afterFirst(const S& str, const T& ch) 00350 { 00351 assert_static(StringTraits<T>::isStringLike); 00352 00353 const size_t pos = str.find(ch); 00354 if (pos != S::npos) 00355 { 00356 size_t chLen = strLength(ch); 00357 return S(str.c_str() + pos + chLen, str.length() - pos - chLen); 00358 } 00359 else 00360 return S(); 00361 00362 } 00363 00364 00365 //returns the whole string if ch not found 00366 template <class S, class T> inline 00367 S beforeFirst(const S& str, const T& ch) 00368 { 00369 assert_static(StringTraits<T>::isStringLike); 00370 00371 const size_t pos = str.find(ch); 00372 if (pos != S::npos) 00373 return S(str.c_str(), pos); //data is non-empty string in this context: else ch would not have been found! 00374 else 00375 return str; 00376 } 00377 00378 00379 template <class S, class T> inline 00380 std::vector<S> split(const S& str, const T& delimiter) 00381 { 00382 assert_static(StringTraits<T>::isStringLike); 00383 00384 std::vector<S> output; 00385 size_t bockStart = 0; 00386 size_t delimLen = strLength(delimiter); 00387 if (delimLen != 0) 00388 { 00389 for (size_t blockEnd = str.find(delimiter, bockStart); 00390 blockEnd != S::npos; 00391 bockStart = blockEnd + delimLen, blockEnd = str.find(delimiter, bockStart)) 00392 { 00393 output.push_back(S(str.c_str() + bockStart, blockEnd - bockStart)); 00394 } 00395 } 00396 output.push_back(S(str.c_str() + bockStart, str.length() - bockStart)); 00397 return output; 00398 } 00399 00400 00401 template <class S> inline 00402 void truncate(S& str, size_t newLen) 00403 { 00404 if (newLen < str.length()) 00405 str.resize(newLen); 00406 } 00407 00408 00409 template <class S, class T, class U> inline 00410 void replace(S& str, const T& old, const U& replacement, bool replaceAll) 00411 { 00412 assert_static(StringTraits<T>::isStringLike); 00413 assert_static(StringTraits<U>::isStringLike); 00414 00415 size_t pos = 0; 00416 size_t oldLen = strLength(old); 00417 size_t repLen = strLength(replacement); 00418 while ((pos = str.find(old, pos)) != S::npos) 00419 { 00420 str.replace(pos, oldLen, replacement); 00421 pos += repLen; //move past the string that was replaced 00422 00423 if (!replaceAll) 00424 break; 00425 } 00426 } 00427 00428 00429 template <class S> inline 00430 void trim(S& str, bool fromLeft, bool fromRight) 00431 { 00432 assert(fromLeft || fromRight); 00433 00434 typedef typename S::value_type CharType; 00435 00436 const CharType* newBegin = str.c_str(); 00437 const CharType* newEnd = str.c_str() + str.length(); 00438 00439 if (fromRight) 00440 while (newBegin != newEnd && cStringIsWhiteSpace(newEnd[-1])) 00441 --newEnd; 00442 00443 if (fromLeft) 00444 while (newBegin != newEnd && cStringIsWhiteSpace(*newBegin)) 00445 ++newBegin; 00446 00447 const size_t newLength = newEnd - newBegin; 00448 if (newLength != str.length()) 00449 { 00450 if (newBegin != str.c_str()) 00451 str = S(newBegin, newLength); //minor inefficiency: in case "str" is not shared, we could save an allocation and do a memory move only 00452 else 00453 str.resize(newLength); 00454 } 00455 } 00456 00457 00458 namespace implementation 00459 { 00460 template <class S, class T> 00461 struct CnvtStringToString 00462 { 00463 T convert(const S& src) const { return T(strBegin(src), strLength(src)); } 00464 }; 00465 00466 template <class S> 00467 struct CnvtStringToString<S, S> //perf: we don't need a deep copy if string types match 00468 { 00469 const S& convert(const S& src) const { return src; } 00470 }; 00471 } 00472 00473 template <class T, class S> inline 00474 T cvrtString(const S& str) { return implementation::CnvtStringToString<S, T>().convert(str); } 00475 00476 00477 namespace implementation 00478 { 00479 enum NumberType 00480 { 00481 NUM_TYPE_SIGNED_INT, 00482 NUM_TYPE_UNSIGNED_INT, 00483 NUM_TYPE_FLOATING_POINT, 00484 NUM_TYPE_OTHER, 00485 }; 00486 00487 00488 template <class S, class Num, NumberType> 00489 struct CvrtNumberToString 00490 { 00491 S convert(const Num& number) const //default number to string conversion using streams: convenient, but SLOW, SLOW, SLOW!!!! (~ factor of 20) 00492 { 00493 typedef typename StringTraits<S>::CharType CharType; 00494 00495 std::basic_ostringstream<CharType> ss; 00496 ss << number; 00497 return cvrtString<S>(ss.str()); 00498 } 00499 }; 00500 00501 00502 template <class S, class Num> 00503 struct CvrtNumberToString<S, Num, NUM_TYPE_FLOATING_POINT> 00504 { 00505 S convert(const Num& number) const { return convertFloat(number, typename StringTraits<S>::CharType()); } 00506 00507 private: 00508 S convertFloat(const Num& number, char) const 00509 { 00510 char buffer[50]; 00511 int charsWritten = std::sprintf(buffer, "%f", static_cast<double>(number)); 00512 return charsWritten > 0 ? S(buffer, charsWritten) : S(); 00513 } 00514 S convertFloat(const Num& number, wchar_t) const 00515 { 00516 wchar_t buffer[50]; 00517 #ifdef __MINGW32__ 00518 int charsWritten = ::swprintf(buffer, L"%f", static_cast<double>(number)); //MinGW does not comply to the C standard! 00519 #else 00520 int charsWritten = std::swprintf(buffer, 50, L"%f", static_cast<double>(number)); 00521 #endif 00522 return charsWritten > 0 ? S(buffer, charsWritten) : S(); 00523 } 00524 }; 00525 00526 /* 00527 perf: integer to string: (executed 10 mio. times) 00528 std::stringstream - 14796 ms 00529 std::sprintf - 3086 ms 00530 hand coded - 778 ms 00531 */ 00532 00533 template <class S, class Num> inline 00534 S formatInteger(Num n, bool hasMinus) 00535 { 00536 assert(n >= 0); 00537 S output; 00538 do 00539 { 00540 output += '0' + n % 10; 00541 n /= 10; 00542 } 00543 while (n != 0); 00544 if (hasMinus) 00545 output += '-'; 00546 00547 std::reverse(output.begin(), output.end()); 00548 return output; 00549 } 00550 00551 template <class S, class Num> 00552 struct CvrtNumberToString<S, Num, NUM_TYPE_SIGNED_INT> 00553 { 00554 S convert(const Num& number) const { return formatInteger<S>(number < 0 ? -number : number, number < 0); } 00555 }; 00556 00557 template <class S, class Num> 00558 struct CvrtNumberToString<S, Num, NUM_TYPE_UNSIGNED_INT> 00559 { 00560 S convert(const Num& number) const { return formatInteger<S>(number, false); } 00561 }; 00562 00563 //-------------------------------------------------------------------------------- 00564 00565 template <class S, class Num, NumberType> 00566 struct CvrtStringToNumber 00567 { 00568 Num convert(const S& str) const //default string to number conversion using streams: convenient, but SLOW 00569 { 00570 typedef typename StringTraits<S>::CharType CharType; 00571 Num number = 0; 00572 std::basic_istringstream<CharType>(cvrtString<std::basic_string<CharType> >(str)) >> number; 00573 return number; 00574 } 00575 }; 00576 00577 00578 template <class S, class Num> 00579 struct CvrtStringToNumber<S, Num, NUM_TYPE_FLOATING_POINT> 00580 { 00581 Num convert(const S& str) const { return convertFloat(strBegin(str)); } 00582 00583 private: 00584 Num convertFloat(const char* str) const { return std::strtod(str, NULL); } 00585 Num convertFloat(const wchar_t* str) const { return std::wcstod(str, NULL); } 00586 }; 00587 00588 template <class Num, class S> 00589 Num extractInteger(const S& str, bool& hasMinusSign) //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic 00590 { 00591 typedef typename StringTraits<S>::CharType CharType; 00592 00593 const CharType* first = strBegin(str); 00594 const CharType* last = first + strLength(str); 00595 00596 while (first != last && cStringIsWhiteSpace(*first)) //skip leading whitespace 00597 ++first; 00598 00599 hasMinusSign = false; //handle minus sign 00600 if (first != last) 00601 { 00602 if (*first == '-') 00603 { 00604 hasMinusSign = true; 00605 ++first; 00606 } 00607 else if (*first == '+') 00608 ++first; 00609 } 00610 00611 Num number = 0; 00612 for (const CharType* iter = first; iter != last; ++iter) 00613 { 00614 const CharType c = *iter; 00615 if ('0' <= c && c <= '9') 00616 { 00617 number *= 10; 00618 number += c - '0'; 00619 } 00620 else 00621 { 00622 assert(std::find_if(iter, last, std::not1(std::ptr_fun(&cStringIsWhiteSpace<CharType>))) == last); //rest of string should contain whitespace only 00623 break; 00624 } 00625 } 00626 return number; 00627 } 00628 00629 00630 template <class S, class Num> 00631 struct CvrtStringToNumber<S, Num, NUM_TYPE_SIGNED_INT> 00632 { 00633 Num convert(const S& str) const 00634 { 00635 bool hasMinusSign = false; //handle minus sign 00636 const Num number = extractInteger<Num>(str, hasMinusSign); 00637 return hasMinusSign ? -number : number; 00638 } 00639 }; 00640 00641 00642 template <class S, class Num> 00643 struct CvrtStringToNumber<S, Num, NUM_TYPE_UNSIGNED_INT> 00644 { 00645 Num convert(const S& str) const //very fast conversion to integers: slightly faster than std::atoi, but more importantly: generic 00646 { 00647 bool hasMinusSign = false; //handle minus sign 00648 const Num number = extractInteger<Num>(str, hasMinusSign); 00649 if (hasMinusSign) 00650 { 00651 assert(false); 00652 return 0U; 00653 } 00654 return number; 00655 } 00656 }; 00657 } 00658 00659 00660 template <class S, class Num> 00661 inline 00662 S toString(const Num& number) //convert number to string the C++ way 00663 { 00664 using namespace implementation; 00665 return CvrtNumberToString<S, Num, 00666 Loki::TypeTraits<Num>::isSignedInt ? NUM_TYPE_SIGNED_INT : 00667 Loki::TypeTraits<Num>::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : 00668 Loki::TypeTraits<Num>::isFloat ? NUM_TYPE_FLOATING_POINT : 00669 NUM_TYPE_OTHER 00670 >().convert(number); 00671 } 00672 00673 00674 template <class Num, class S> 00675 inline 00676 Num toNumber(const S& str) //convert string to number the C++ way 00677 { 00678 using namespace implementation; 00679 return CvrtStringToNumber<S, Num, 00680 Loki::TypeTraits<Num>::isSignedInt ? NUM_TYPE_SIGNED_INT : 00681 Loki::TypeTraits<Num>::isUnsignedInt ? NUM_TYPE_UNSIGNED_INT : 00682 Loki::TypeTraits<Num>::isFloat ? NUM_TYPE_FLOATING_POINT : 00683 NUM_TYPE_OTHER 00684 >().convert(str); 00685 } 00686 00687 } 00688 00689 #endif //STRING_TOOLS_HEADER_213458973046