/*
**********************************************************************
* Copyright (C) 1998-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
* File unistr.h
*
* Modification History:
*
* Date Name Description
* 09/25/98 stephen Creation.
* 11/11/98 stephen Changed per 11/9 code review.
* 04/20/99 stephen Overhauled per 4/16 code review.
* 11/18/99 aliu Made to inherit from Replaceable. Added method
* handleReplaceBetween(); other methods unchanged.
* 06/25/01 grhoten Remove dependency on iostream.
******************************************************************************
*/
#ifndef UNISTR_H
#define UNISTR_H
/**
* \file
* \brief C++ API: Unicode String
*/
#include "unicode/utypes.h"
#include "unicode/rep.h"
#include "unicode/std_string.h"
#include "unicode/stringpiece.h"
#include "unicode/bytestream.h"
#include "unicode/ucasemap.h"
struct UConverter; // unicode/ucnv.h
#ifndef U_COMPARE_CODE_POINT_ORDER
/* see also ustring.h and unorm.h */
/**
* Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
* Compare strings in code point order instead of code unit order.
* @stable ICU 2.2
*/
#define U_COMPARE_CODE_POINT_ORDER 0x8000
#endif
#ifndef USTRING_H
/**
* \ingroup ustring_ustrlen
*/
U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s);
#endif
/**
* \def U_STRING_CASE_MAPPER_DEFINED
* @internal
*/
#ifndef U_STRING_CASE_MAPPER_DEFINED
#define U_STRING_CASE_MAPPER_DEFINED
/**
* Internal string case mapping function type.
* @internal
*/
typedef int32_t U_CALLCONV
UStringCaseMapper(const UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UErrorCode *pErrorCode);
#endif
U_NAMESPACE_BEGIN
class BreakIterator; // unicode/brkiter.h
class Locale; // unicode/locid.h
class StringCharacterIterator;
class UnicodeStringAppendable; // unicode/appendable.h
/* The <iostream> include has been moved to unicode/ustream.h */
/**
* Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
* which constructs a Unicode string from an invariant-character char * string.
* About invariant characters see utypes.h.
* This constructor has no runtime dependency on conversion code and is
* therefore recommended over ones taking a charset name string
* (where the empty string "" indicates invariant-character conversion).
*
* @stable ICU 3.2
*/
#define US_INV icu::UnicodeString::kInvariant
/**
* Unicode String literals in C++.
* Dependent on the platform properties, different UnicodeString
* constructors should be used to create a UnicodeString object from
* a string literal.
* The macros are defined for maximum performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
*
* The string parameter must be a C string literal.
* The length of the string, not including the terminating
* <code>NUL</code>, must be specified as a constant.
* The U_STRING_DECL macro should be invoked exactly once for one
* such string variable before it is used.
* @stable ICU 2.0
*/
#if defined(U_DECLARE_UTF16)
# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
#else
# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
#endif
/**
* Unicode String literals in C++.
* Dependent on the platform properties, different UnicodeString
* constructors should be used to create a UnicodeString object from
* a string literal.
* The macros are defined for improved performance.
* They work only for strings that contain "invariant characters", i.e.,
* only latin letters, digits, and some punctuation.
* See utypes.h for details.
*
* The string parameter must be a C string literal.
* @stable ICU 2.0
*/
#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
/**
* \def UNISTR_FROM_CHAR_EXPLICIT
* This can be defined to be empty or "explicit".
* If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
* constructors are marked as explicit, preventing their inadvertent use.
* @stable ICU 49
*/
#ifndef UNISTR_FROM_CHAR_EXPLICIT
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Auto-"explicit" in ICU library code.
# define UNISTR_FROM_CHAR_EXPLICIT explicit
# else
// Empty by default for source code compatibility.
# define UNISTR_FROM_CHAR_EXPLICIT
# endif
#endif
/**
* \def UNISTR_FROM_STRING_EXPLICIT
* This can be defined to be empty or "explicit".
* If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
* constructors are marked as explicit, preventing their inadvertent use.
*
* In particular, this helps prevent accidentally depending on ICU conversion code
* by passing a string literal into an API with a const UnicodeString & parameter.
* @stable ICU 49
*/
#ifndef UNISTR_FROM_STRING_EXPLICIT
# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Auto-"explicit" in ICU library code.
# define UNISTR_FROM_STRING_EXPLICIT explicit
# else
// Empty by default for source code compatibility.
# define UNISTR_FROM_STRING_EXPLICIT
# endif
#endif
/* Cannot make the following #ifndef U_HIDE_DRAFT_API,
it is used to construct other non-internal constants */
/**
* \def UNISTR_OBJECT_SIZE
* Desired sizeof(UnicodeString) in bytes.
* It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
* The object size may want to be a multiple of 16 bytes,
* which is a common granularity for heap allocation.
*
* Any space inside the object beyond sizeof(vtable pointer) + 2
* is available for storing short strings inside the object.
* The bigger the object, the longer a string that can be stored inside the object,
* without additional heap allocation.
*
* Depending on a platform's pointer size, pointer alignment requirements,
* and struct padding, the compiler will usually round up sizeof(UnicodeString)
* to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
* to hold the fields for heap-allocated strings.
* Such a minimum size also ensures that the object is easily large enough
* to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
*
* sizeof(UnicodeString) >= 48 should work for all known platforms.
*
* For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
* sizeof(UnicodeString) = 64 would leave space for
* (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
* UChars stored inside the object.
*
* The minimum object size on a 64-bit machine would be
* 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
* and the internal buffer would hold up to 11 UChars in that case.
*
* @see U16_MAX_LENGTH
* @draft ICU 56
*/
#ifndef UNISTR_OBJECT_SIZE
# define UNISTR_OBJECT_SIZE 64
#endif
/**
* UnicodeString is a string class that stores Unicode characters directly and provides
* similar functionality as the Java String and StringBuffer/StringBuilder classes.
* It is a concrete implementation of the abstract class Replaceable (for transliteration).
*
* A UnicodeString may also "alias" an external