Main Page   Class Hierarchy   Alphabetical List   Data Structures   File List   Data Fields   Globals  

uchar.h File Reference

C API: Unicode Char. More...

#include "unicode/utypes.h"

Go to the source code of this file.

Defines

#define U_UNICODE_VERSION   "3.1.1"
#define UCHAR_MIN_VALUE   0
 The lowest Unicode code point value. More...

#define UCHAR_MAX_VALUE   0x10ffff
 The highest Unicode code point value (scalar value) according to The Unicode Standard. More...

#define U_FOLD_CASE_DEFAULT   0
 Option value for case folding: use all mappings defined in CaseFolding.txt. More...

#define U_FOLD_CASE_EXCLUDE_SPECIAL_I   1
 Option value for case folding: exclude the mappings for dotted I and dotless i marked with 'I' in CaseFolding.txt. More...

#define u_charScript   ublock_getCode

Typedefs

typedef enum UCharCategory UCharCategory
typedef enum UCharDirection UCharDirection
typedef enum UBlockCode UBlockCode
 @draft ICU 2.0.

typedef enum UCellWidth UCellWidth
 @stable.

typedef enum UCharNameChoice UCharNameChoice
 @stable.

typedef UBool U_CALLCONV UCharEnumTypeRange (const void *context, UChar32 start, UChar32 limit, UCharCategory type)
 Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c<limit) with the same Unicode general category ("character type"). More...

typedef UBool UEnumCharNamesFn (void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, UTextOffset length)
 Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name. More...

typedef UBlockCode UCharScript

Enumerations

enum  UCharCategory {
  U_UNASSIGNED = 0, U_GENERAL_OTHER_TYPES = 0, U_UPPERCASE_LETTER = 1, U_LOWERCASE_LETTER = 2,
  U_TITLECASE_LETTER = 3, U_MODIFIER_LETTER = 4, U_OTHER_LETTER = 5, U_NON_SPACING_MARK = 6,
  U_ENCLOSING_MARK = 7, U_COMBINING_SPACING_MARK = 8, U_DECIMAL_DIGIT_NUMBER = 9, U_LETTER_NUMBER = 10,
  U_OTHER_NUMBER = 11, U_SPACE_SEPARATOR = 12, U_LINE_SEPARATOR = 13, U_PARAGRAPH_SEPARATOR = 14,
  U_CONTROL_CHAR = 15, U_FORMAT_CHAR = 16, U_PRIVATE_USE_CHAR = 17, U_SURROGATE = 18,
  U_DASH_PUNCTUATION = 19, U_START_PUNCTUATION = 20, U_END_PUNCTUATION = 21, U_CONNECTOR_PUNCTUATION = 22,
  U_OTHER_PUNCTUATION = 23, U_MATH_SYMBOL = 24, U_CURRENCY_SYMBOL = 25, U_MODIFIER_SYMBOL = 26,
  U_OTHER_SYMBOL = 27, U_INITIAL_PUNCTUATION = 28, U_FINAL_PUNCTUATION = 29, U_CHAR_CATEGORY_COUNT
}
 Data for enumerated Unicode general category types. More...

enum  UCharDirection {
  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT
}
 This specifies the language directional property of a character set. More...

enum  UBlockCode {
  UBLOCK_BASIC_LATIN = 1, U_BASIC_LATIN = 1, UBLOCK_LATIN_1_SUPPLEMENT = 2, U_LATIN_1_SUPPLEMENT = 2,
  UBLOCK_LATIN_EXTENDED_A = 3, U_LATIN_EXTENDED_A = 3, UBLOCK_LATIN_EXTENDED_B = 4, U_LATIN_EXTENDED_B = 4,
  UBLOCK_IPA_EXTENSIONS = 5, U_IPA_EXTENSIONS = 5, UBLOCK_SPACING_MODIFIER_LETTERS = 6, U_SPACING_MODIFIER_LETTERS = 6,
  UBLOCK_COMBINING_DIACRITICAL_MARKS = 7, U_COMBINING_DIACRITICAL_MARKS = 7, UBLOCK_GREEK = 8, U_GREEK = 8,
  UBLOCK_CYRILLIC = 9, U_CYRILLIC = 9, UBLOCK_ARMENIAN = 10, U_ARMENIAN = 10,
  UBLOCK_HEBREW = 11, U_HEBREW = 11, UBLOCK_ARABIC = 12, U_ARABIC = 12,
  UBLOCK_SYRIAC = 13, U_SYRIAC = 13, UBLOCK_THAANA = 14, U_THAANA = 14,
  UBLOCK_DEVANAGARI = 15, U_DEVANAGARI = 15, UBLOCK_BENGALI = 16, U_BENGALI = 16,
  UBLOCK_GURMUKHI = 17, U_GURMUKHI = 17, UBLOCK_GUJARATI = 18, U_GUJARATI = 18,
  UBLOCK_ORIYA = 19, U_ORIYA = 19, UBLOCK_TAMIL = 20, U_TAMIL = 20,
  UBLOCK_TELUGU = 21, U_TELUGU = 21, UBLOCK_KANNADA = 22, U_KANNADA = 22,
  UBLOCK_MALAYALAM = 23, U_MALAYALAM = 23, UBLOCK_SINHALA = 24, U_SINHALA = 24,
  UBLOCK_THAI = 25, U_THAI = 25, UBLOCK_LAO = 26, U_LAO = 26,
  UBLOCK_TIBETAN = 27, U_TIBETAN = 27, UBLOCK_MYANMAR = 28, U_MYANMAR = 28,
  UBLOCK_GEORGIAN = 29, U_GEORGIAN = 29, UBLOCK_HANGUL_JAMO = 30, U_HANGUL_JAMO = 30,
  UBLOCK_ETHIOPIC = 31, U_ETHIOPIC = 31, UBLOCK_CHEROKEE = 32, U_CHEROKEE = 32,
  UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 33, UBLOCK_OGHAM = 34, U_OGHAM = 34,
  UBLOCK_RUNIC = 35, U_RUNIC = 35, UBLOCK_KHMER = 36, U_KHMER = 36,
  UBLOCK_MONGOLIAN = 37, U_MONGOLIAN = 37, UBLOCK_LATIN_EXTENDED_ADDITIONAL = 38, U_LATIN_EXTENDED_ADDITIONAL = 38,
  UBLOCK_GREEK_EXTENDED = 39, U_GREEK_EXTENDED = 39, UBLOCK_GENERAL_PUNCTUATION = 40, U_GENERAL_PUNCTUATION = 40,
  UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, U_SUPERSCRIPTS_AND_SUBSCRIPTS = 41, UBLOCK_CURRENCY_SYMBOLS = 42, U_CURRENCY_SYMBOLS = 42,
  UBLOCK_COMBINING_MARKS_FOR_SYMBOLS = 43, U_COMBINING_MARKS_FOR_SYMBOLS = 43, UBLOCK_LETTERLIKE_SYMBOLS = 44, U_LETTERLIKE_SYMBOLS = 44,
  UBLOCK_NUMBER_FORMS = 45, U_NUMBER_FORMS = 45, UBLOCK_ARROWS = 46, U_ARROWS = 46,
  UBLOCK_MATHEMATICAL_OPERATORS = 47, U_MATHEMATICAL_OPERATORS = 47, UBLOCK_MISCELLANEOUS_TECHNICAL = 48, U_MISCELLANEOUS_TECHNICAL = 48,
  UBLOCK_CONTROL_PICTURES = 49, U_CONTROL_PICTURES = 49, UBLOCK_OPTICAL_CHARACTER_RECOGNITION = 50, U_OPTICAL_CHARACTER_RECOGNITION = 50,
  UBLOCK_ENCLOSED_ALPHANUMERICS = 51, U_ENCLOSED_ALPHANUMERICS = 51, UBLOCK_BOX_DRAWING = 52, U_BOX_DRAWING = 52,
  UBLOCK_BLOCK_ELEMENTS = 53, U_BLOCK_ELEMENTS = 53, UBLOCK_GEOMETRIC_SHAPES = 54, U_GEOMETRIC_SHAPES = 54,
  UBLOCK_MISCELLANEOUS_SYMBOLS = 55, U_MISCELLANEOUS_SYMBOLS = 55, UBLOCK_DINGBATS = 56, U_DINGBATS = 56,
  UBLOCK_BRAILLE_PATTERNS = 57, U_BRAILLE_PATTERNS = 57, UBLOCK_CJK_RADICALS_SUPPLEMENT = 58, U_CJK_RADICALS_SUPPLEMENT = 58,
  UBLOCK_KANGXI_RADICALS = 59, U_KANGXI_RADICALS = 59, UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60, U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 60,
  UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION = 61, U_CJK_SYMBOLS_AND_PUNCTUATION = 61, UBLOCK_HIRAGANA = 62, U_HIRAGANA = 62,
  UBLOCK_KATAKANA = 63, U_KATAKANA = 63, UBLOCK_BOPOMOFO = 64, U_BOPOMOFO = 64,
  UBLOCK_HANGUL_COMPATIBILITY_JAMO = 65, U_HANGUL_COMPATIBILITY_JAMO = 65, UBLOCK_KANBUN = 66, U_KANBUN = 66,
  UBLOCK_BOPOMOFO_EXTENDED = 67, U_BOPOMOFO_EXTENDED = 67, UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68, U_ENCLOSED_CJK_LETTERS_AND_MONTHS = 68,
  UBLOCK_CJK_COMPATIBILITY = 69, U_CJK_COMPATIBILITY = 69, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70, U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 70,
  UBLOCK_CJK_UNIFIED_IDEOGRAPHS = 71, U_CJK_UNIFIED_IDEOGRAPHS = 71, UBLOCK_YI_SYLLABLES = 72, U_YI_SYLLABLES = 72,
  UBLOCK_YI_RADICALS = 73, U_YI_RADICALS = 73, UBLOCK_HANGUL_SYLLABLES = 74, U_HANGUL_SYLLABLES = 74,
  UBLOCK_HIGH_SURROGATES = 75, U_HIGH_SURROGATES = 75, UBLOCK_HIGH_PRIVATE_USE_SURROGATES = 76, U_HIGH_PRIVATE_USE_SURROGATES = 76,
  UBLOCK_LOW_SURROGATES = 77, U_LOW_SURROGATES = 77, UBLOCK_PRIVATE_USE = 78, UBLOCK_PRIVATE_USE_AREA = UBLOCK_PRIVATE_USE,
  U_PRIVATE_USE_AREA = 78, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS = 79, U_CJK_COMPATIBILITY_IDEOGRAPHS = 79, UBLOCK_ALPHABETIC_PRESENTATION_FORMS = 80,
  U_ALPHABETIC_PRESENTATION_FORMS = 80, UBLOCK_ARABIC_PRESENTATION_FORMS_A = 81, U_ARABIC_PRESENTATION_FORMS_A = 81, UBLOCK_COMBINING_HALF_MARKS = 82,
  U_COMBINING_HALF_MARKS = 82, UBLOCK_CJK_COMPATIBILITY_FORMS = 83, U_CJK_COMPATIBILITY_FORMS = 83, UBLOCK_SMALL_FORM_VARIANTS = 84,
  U_SMALL_FORM_VARIANTS = 84, UBLOCK_ARABIC_PRESENTATION_FORMS_B = 85, U_ARABIC_PRESENTATION_FORMS_B = 85, UBLOCK_SPECIALS = 86,
  U_SPECIALS = 86, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, U_HALFWIDTH_AND_FULLWIDTH_FORMS = 87, UBLOCK_OLD_ITALIC = 88,
  UBLOCK_GOTHIC = 89, UBLOCK_DESERET = 90, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, UBLOCK_MUSICAL_SYMBOLS = 92,
  UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, UBLOCK_TAGS = 96,
  UBLOCK_COUNT = 97, U_SCRIPT_COUNT = UBLOCK_COUNT, UBLOCK_INVALID_CODE = -1, U_CHAR_SCRIPT_COUNT = UBLOCK_COUNT,
  U_NO_SCRIPT = UBLOCK_COUNT
}
 Constants for Unicode blocks, generated from Unicode Data file Blocks.txt These are the same values as Unicode::EUnicodeScript @draft ICU 2.0. More...

enum  UCellWidth {
  U_ZERO_WIDTH = 0, U_HALF_WIDTH = 1, U_FULL_WIDTH = 2, U_NEUTRAL_WIDTH = 3,
  U_CELL_WIDTH_COUNT
}
 Values returned by the u_getCellWidth() function. More...

enum  UCharNameChoice { U_UNICODE_CHAR_NAME, U_UNICODE_10_CHAR_NAME, U_EXTENDED_CHAR_NAME, U_CHAR_NAME_CHOICE_COUNT }
 Selector constants for u_charName(). More...


Functions

U_CAPI UBool U_EXPORT2 u_islower (UChar32 c)
 Determines whether the specified UChar is a lowercase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isupper (UChar32 c)
 Determines whether the specified character is an uppercase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_istitle (UChar32 c)
 Determines whether the specified character is a titlecase character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isdigit (UChar32 c)
 Determines whether the specified character is a digit according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isalnum (UChar32 c)
 Determines whether the specified character is an alphanumeric character (letter or digit)according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isdefined (UChar32 c)
 Determines whether the specified numeric value is actually a defined character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isalpha (UChar32 c)
 Determines whether the specified character is a letter according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isspace (UChar32 c)
 Determines if the specified character is a space character or not. More...

U_CAPI UBool U_EXPORT2 u_isWhitespace (UChar32 c)
 Determines if the specified character is white space according to ICU. More...

U_CAPI UBool U_EXPORT2 u_iscntrl (UChar32 c)
 Determines whether the specified character is a control character or not. More...

U_CAPI UBool U_EXPORT2 u_isprint (UChar32 c)
 Determines whether the specified character is a printable character according to UnicodeData.txt. More...

U_CAPI UBool U_EXPORT2 u_isbase (UChar32 c)
 Determines whether the specified character is of the base form according to UnicodeData.txt. More...

U_CAPI UCharDirection U_EXPORT2 u_charDirection (UChar32 c)
 Returns the linguistic direction property of a character. More...

U_CAPI UBool U_EXPORT2 u_isMirrored (UChar32 c)
 Determines whether the character has the "mirrored" property. More...

U_CAPI UChar32 U_EXPORT2 u_charMirror (UChar32 c)
 Maps the specified character to a "mirror-image" character. More...

U_CAPI uint16_t U_EXPORT2 u_charCellWidth (UChar32 c)
 Returns a value indicating the display-cell width of the character when used in Asian text, according to the Unicode standard (see p. More...

U_CAPI int8_t U_EXPORT2 u_charType (UChar32 c)
 Returns a value indicating a character category. More...

U_CAPI void U_EXPORT2 u_enumCharTypes (UCharEnumTypeRange *enumRange, const void *context)
 Enumerate efficiently all code points with their Unicode general categories. More...

U_CAPI uint8_t U_EXPORT2 u_getCombiningClass (UChar32 c)
 Returns the combining class of the code point as specified in UnicodeData.txt. More...

U_CAPI int32_t U_EXPORT2 u_charDigitValue (UChar32 c)
 Retrives the decimal numeric value of a digit character. More...

U_CAPI UBlockCode U_EXPORT2 ublock_getCode (UChar32 ch)
 Returns the Unicode allocation block that contains the character. More...

U_CAPI UTextOffset U_EXPORT2 u_charName (UChar32 code, UCharNameChoice nameChoice, char *buffer, UTextOffset bufferLength, UErrorCode *pErrorCode)
 Retrieve the name of a Unicode character. More...

U_CAPI UChar32 U_EXPORT2 u_charFromName (UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
 Find a Unicode character by its name and return its code point value. More...

U_CAPI void U_EXPORT2 u_enumCharNames (UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
 Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name. More...

U_CAPI UBool U_EXPORT2 u_isIDStart (UChar32 c)
 A convenience method for determining if a Unicode character is allowed to start in a Unicode identifier. More...

U_CAPI UBool U_EXPORT2 u_isIDPart (UChar32 c)
 A convenience method for determining if a Unicode character may be part of a Unicode identifier other than the starting character. More...

U_CAPI UBool U_EXPORT2 u_isIDIgnorable (UChar32 c)
 A convenience method for determining if a Unicode character should be regarded as an ignorable character in a Unicode identifier. More...

U_CAPI UBool U_EXPORT2 u_isJavaIDStart (UChar32 c)
 A convenience method for determining if a Unicode character is allowed as the first character in a Java identifier. More...

U_CAPI UBool U_EXPORT2 u_isJavaIDPart (UChar32 c)
 A convenience method for determining if a Unicode character may be part of a Java identifier other than the starting character. More...

U_CAPI UChar32 U_EXPORT2 u_tolower (UChar32 c)
 The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned. More...

U_CAPI UChar32 U_EXPORT2 u_toupper (UChar32 c)
 The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned. More...

U_CAPI UChar32 U_EXPORT2 u_totitle (UChar32 c)
 The given character is mapped to its titlecase equivalent according to UnicodeData.txt. More...

U_CAPI UChar32 U_EXPORT2 u_foldCase (UChar32 c, uint32_t options)
 The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned. More...

U_CAPI int32_t U_EXPORT2 u_digit (UChar32 ch, int8_t radix)
 Returns the numeric value of the character ch in the specified radix. More...

U_CAPI UChar32 U_EXPORT2 u_forDigit (int32_t digit, int8_t radix)
 Determines the character representation for a specific digit in the specified radix. More...

U_CAPI void U_EXPORT2 u_getUnicodeVersion (UVersionInfo info)
 Gets the Unicode version information. More...


Detailed Description

C API: Unicode Char.

Unicode C API

The Unicode C API allows you to query the properties associated with individual Unicode character values.

The Unicode character information, provided implicitly by the Unicode character encoding standard, includes information about the script (for example, symbols or control characters) to which the character belongs, as well as semantic information such as whether a character is a digit or uppercase, lowercase, or uncased.


Define Documentation

#define u_charScript   ublock_getCode
 

Deprecated:
Use u_charBlock instead. Remove after Aug,2002

#define U_FOLD_CASE_DEFAULT   0
 

Option value for case folding: use all mappings defined in CaseFolding.txt.

@draft ICU 1.8

#define U_FOLD_CASE_EXCLUDE_SPECIAL_I   1
 

Option value for case folding: exclude the mappings for dotted I and dotless i marked with 'I' in CaseFolding.txt.

@draft ICU 1.8

#define UCHAR_MAX_VALUE   0x10ffff
 

The highest Unicode code point value (scalar value) according to The Unicode Standard.

This is a 21-bit value (20.1 bits, rounded up). For a single character, UChar32 is a simple type that can hold any code point value. @stable

#define UCHAR_MIN_VALUE   0
 

The lowest Unicode code point value.

Code points are non-negative. @stable


Typedef Documentation

typedef UBool U_CALLCONV UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
 

Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c<limit) with the same Unicode general category ("character type").

The callback function can stop the enumeration by returning FALSE.

Parameters:
context  an opaque pointer, as passed into utrie_enum()
start  the first code point in a contiguous range with value
limit  one past the last code point in a contiguous range with value
type  the general category for all code points in [start..limit[
Returns:
FALSE to stop the enumeration
@draft ICU 2.1
See also:
UCharCategory , u_enumCharTypes

typedef UBlockCode UCharScript
 

Deprecated:
Use the enum UCharBlock instead. Remove after Aug,2002

typedef UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, UTextOffset length)
 

Type of a callback function for u_enumCharNames() that gets called for each Unicode character with the code point value and the character name.

If such a function returns FALSE, then the enumeration is stopped.

Parameters:
context  The context pointer that was passed to u_enumCharNames().
code  The Unicode code point for the character with this name.
nameChoice  Selector for which kind of names is enumerated.
name  The character's name, zero-terminated.
length  The length of the name.
Returns:
TRUE if the enumeration should continue, FALSE to stop it.
See also:
UCharNameChoice , u_enumCharNames


Enumeration Type Documentation

enum UBlockCode
 

Constants for Unicode blocks, generated from Unicode Data file Blocks.txt These are the same values as Unicode::EUnicodeScript @draft ICU 2.0.

Enumeration values:
UBLOCK_BASIC_LATIN  @draft ICU 2.0.
U_BASIC_LATIN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_1_SUPPLEMENT  @draft ICU 2.0.
U_LATIN_1_SUPPLEMENT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_A  @draft ICU 2.0.
U_LATIN_EXTENDED_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_B  @draft ICU 2.0.
U_LATIN_EXTENDED_B 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_IPA_EXTENSIONS  @draft ICU 2.0.
U_IPA_EXTENSIONS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SPACING_MODIFIER_LETTERS  @draft ICU 2.0.
U_SPACING_MODIFIER_LETTERS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_DIACRITICAL_MARKS  @draft ICU 2.0.
U_COMBINING_DIACRITICAL_MARKS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GREEK  @draft ICU 2.0.
U_GREEK 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CYRILLIC  @draft ICU 2.0.
U_CYRILLIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARMENIAN  @draft ICU 2.0.
U_ARMENIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HEBREW  @draft ICU 2.0.
U_HEBREW 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC  @draft ICU 2.0.
U_ARABIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SYRIAC  @draft ICU 2.0.
U_SYRIAC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_THAANA  @draft ICU 2.0.
U_THAANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_DEVANAGARI  @draft ICU 2.0.
U_DEVANAGARI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BENGALI  @draft ICU 2.0.
U_BENGALI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GURMUKHI  @draft ICU 2.0.
U_GURMUKHI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GUJARATI  @draft ICU 2.0.
U_GUJARATI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ORIYA  @draft ICU 2.0.
U_ORIYA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TAMIL  @draft ICU 2.0.
U_TAMIL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TELUGU  @draft ICU 2.0.
U_TELUGU 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANNADA  @draft ICU 2.0.
U_KANNADA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MALAYALAM  @draft ICU 2.0.
U_MALAYALAM 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SINHALA  @draft ICU 2.0.
U_SINHALA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_THAI  @draft ICU 2.0.
U_THAI 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LAO  @draft ICU 2.0.
U_LAO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_TIBETAN  @draft ICU 2.0.
U_TIBETAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MYANMAR  @draft ICU 2.0.
U_MYANMAR 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GEORGIAN  @draft ICU 2.0.
U_GEORGIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_JAMO  @draft ICU 2.0.
U_HANGUL_JAMO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ETHIOPIC  @draft ICU 2.0.
U_ETHIOPIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CHEROKEE  @draft ICU 2.0.
U_CHEROKEE 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS  @draft ICU 2.0.
U_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OGHAM  @draft ICU 2.0.
U_OGHAM 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_RUNIC  @draft ICU 2.0.
U_RUNIC 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KHMER  @draft ICU 2.0.
U_KHMER 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MONGOLIAN  @draft ICU 2.0.
U_MONGOLIAN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LATIN_EXTENDED_ADDITIONAL  @draft ICU 2.0.
U_LATIN_EXTENDED_ADDITIONAL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GREEK_EXTENDED  @draft ICU 2.0.
U_GREEK_EXTENDED 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GENERAL_PUNCTUATION  @draft ICU 2.0.
U_GENERAL_PUNCTUATION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS  @draft ICU 2.0.
U_SUPERSCRIPTS_AND_SUBSCRIPTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CURRENCY_SYMBOLS  @draft ICU 2.0.
U_CURRENCY_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_MARKS_FOR_SYMBOLS  @draft ICU 2.0.
U_COMBINING_MARKS_FOR_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LETTERLIKE_SYMBOLS  @draft ICU 2.0.
U_LETTERLIKE_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_NUMBER_FORMS  @draft ICU 2.0.
U_NUMBER_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARROWS  @draft ICU 2.0.
U_ARROWS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MATHEMATICAL_OPERATORS  @draft ICU 2.0.
U_MATHEMATICAL_OPERATORS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MISCELLANEOUS_TECHNICAL  @draft ICU 2.0.
U_MISCELLANEOUS_TECHNICAL 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CONTROL_PICTURES  @draft ICU 2.0.
U_CONTROL_PICTURES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OPTICAL_CHARACTER_RECOGNITION  @draft ICU 2.0.
U_OPTICAL_CHARACTER_RECOGNITION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ENCLOSED_ALPHANUMERICS  @draft ICU 2.0.
U_ENCLOSED_ALPHANUMERICS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOX_DRAWING  @draft ICU 2.0.
U_BOX_DRAWING 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BLOCK_ELEMENTS  @draft ICU 2.0.
U_BLOCK_ELEMENTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_GEOMETRIC_SHAPES  @draft ICU 2.0.
U_GEOMETRIC_SHAPES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_MISCELLANEOUS_SYMBOLS  @draft ICU 2.0.
U_MISCELLANEOUS_SYMBOLS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_DINGBATS  @draft ICU 2.0.
U_DINGBATS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BRAILLE_PATTERNS  @draft ICU 2.0.
U_BRAILLE_PATTERNS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_RADICALS_SUPPLEMENT  @draft ICU 2.0.
U_CJK_RADICALS_SUPPLEMENT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANGXI_RADICALS  @draft ICU 2.0.
U_KANGXI_RADICALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS  @draft ICU 2.0.
U_IDEOGRAPHIC_DESCRIPTION_CHARACTERS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION  @draft ICU 2.0.
U_CJK_SYMBOLS_AND_PUNCTUATION 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIRAGANA  @draft ICU 2.0.
U_HIRAGANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KATAKANA  @draft ICU 2.0.
U_KATAKANA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOPOMOFO  @draft ICU 2.0.
U_BOPOMOFO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_COMPATIBILITY_JAMO  @draft ICU 2.0.
U_HANGUL_COMPATIBILITY_JAMO 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_KANBUN  @draft ICU 2.0.
U_KANBUN 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_BOPOMOFO_EXTENDED  @draft ICU 2.0.
U_BOPOMOFO_EXTENDED 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS  @draft ICU 2.0.
U_ENCLOSED_CJK_LETTERS_AND_MONTHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY  @draft ICU 2.0.
U_CJK_COMPATIBILITY 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A  @draft ICU 2.0.
U_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_UNIFIED_IDEOGRAPHS  @draft ICU 2.0.
U_CJK_UNIFIED_IDEOGRAPHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_YI_SYLLABLES  @draft ICU 2.0.
U_YI_SYLLABLES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_YI_RADICALS  @draft ICU 2.0.
U_YI_RADICALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HANGUL_SYLLABLES  @draft ICU 2.0.
U_HANGUL_SYLLABLES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIGH_SURROGATES  @draft ICU 2.0.
U_HIGH_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HIGH_PRIVATE_USE_SURROGATES  @draft ICU 2.0.
U_HIGH_PRIVATE_USE_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_LOW_SURROGATES  @draft ICU 2.0.
U_LOW_SURROGATES 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_PRIVATE_USE  @draft ICU 2.0.
UBLOCK_PRIVATE_USE_AREA 
Deprecated:
Use UBLOCK_PRIVATE_USE. Remove after Aug, 2002
U_PRIVATE_USE_AREA 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS  @draft ICU 2.0.
U_CJK_COMPATIBILITY_IDEOGRAPHS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ALPHABETIC_PRESENTATION_FORMS  @draft ICU 2.0.
U_ALPHABETIC_PRESENTATION_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC_PRESENTATION_FORMS_A  @draft ICU 2.0.
U_ARABIC_PRESENTATION_FORMS_A 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_COMBINING_HALF_MARKS  @draft ICU 2.0.
U_COMBINING_HALF_MARKS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_CJK_COMPATIBILITY_FORMS  @draft ICU 2.0.
U_CJK_COMPATIBILITY_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SMALL_FORM_VARIANTS  @draft ICU 2.0.
U_SMALL_FORM_VARIANTS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_ARABIC_PRESENTATION_FORMS_B  @draft ICU 2.0.
U_ARABIC_PRESENTATION_FORMS_B 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_SPECIALS  @draft ICU 2.0.
U_SPECIALS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS  @draft ICU 2.0.
U_HALFWIDTH_AND_FULLWIDTH_FORMS 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_OLD_ITALIC  @draft ICU 2.0.
UBLOCK_GOTHIC  @draft ICU 2.0.
UBLOCK_DESERET  @draft ICU 2.0.
UBLOCK_BYZANTINE_MUSICAL_SYMBOLS  @draft ICU 2.0.
UBLOCK_MUSICAL_SYMBOLS  @draft ICU 2.0.
UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS  @draft ICU 2.0.
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  @draft ICU 2.0.
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT  @draft ICU 2.0.
UBLOCK_TAGS  @draft ICU 2.0.
UBLOCK_COUNT  @draft ICU 2.0.
U_SCRIPT_COUNT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
UBLOCK_INVALID_CODE  @draft ICU 2.0.
U_CHAR_SCRIPT_COUNT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002
U_NO_SCRIPT 
Deprecated:
Use the enum that begins with UBLOCK. Remove after Aug, 2002

enum UCellWidth
 

Values returned by the u_getCellWidth() function.

@stable

Enumeration values:
U_ZERO_WIDTH  @stable.
U_HALF_WIDTH  @stable.
U_FULL_WIDTH  @stable.
U_NEUTRAL_WIDTH  @stable.
U_CELL_WIDTH_COUNT  @stable.

enum UCharCategory
 

Data for enumerated Unicode general category types.

See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . @stable

Enumeration values:
U_UNASSIGNED  Non-category for unassigned and non-character code points.

@stable

U_GENERAL_OTHER_TYPES  Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @draft ICU 2.0.
U_UPPERCASE_LETTER  Lu @stable.
U_LOWERCASE_LETTER  Ll @stable.
U_TITLECASE_LETTER  Lt @stable.
U_MODIFIER_LETTER  Lm @stable.
U_OTHER_LETTER  Lo @stable.
U_NON_SPACING_MARK  Mn @stable.
U_ENCLOSING_MARK  Me @stable.
U_COMBINING_SPACING_MARK  Mc @stable.
U_DECIMAL_DIGIT_NUMBER  Nd @stable.
U_LETTER_NUMBER  Nl @stable.
U_OTHER_NUMBER  No @stable.
U_SPACE_SEPARATOR  Zs @stable.
U_LINE_SEPARATOR  Zl @stable.
U_PARAGRAPH_SEPARATOR  Zp @stable.
U_CONTROL_CHAR  Cc @stable.
U_FORMAT_CHAR  Cf @stable.
U_PRIVATE_USE_CHAR  Co @stable.
U_SURROGATE  Cs @stable.
U_DASH_PUNCTUATION  Pd @stable.
U_START_PUNCTUATION  Ps @stable.
U_END_PUNCTUATION  Pe @stable.
U_CONNECTOR_PUNCTUATION  Pc @stable.
U_OTHER_PUNCTUATION  Po @stable.
U_MATH_SYMBOL  Sm @stable.
U_CURRENCY_SYMBOL  Sc @stable.
U_MODIFIER_SYMBOL  Sk @stable.
U_OTHER_SYMBOL  So @stable.
U_INITIAL_PUNCTUATION  Pi @stable.
U_FINAL_PUNCTUATION  Pf @stable.
U_CHAR_CATEGORY_COUNT  One higher than the last enum UCharCategory constant.

@stable

enum UCharDirection
 

This specifies the language directional property of a character set.

@stable

Enumeration values:
U_LEFT_TO_RIGHT  L @stable.
U_RIGHT_TO_LEFT  R @stable.
U_EUROPEAN_NUMBER  EN @stable.
U_EUROPEAN_NUMBER_SEPARATOR  ES @stable.
U_EUROPEAN_NUMBER_TERMINATOR  ET @stable.
U_ARABIC_NUMBER  AN @stable.
U_COMMON_NUMBER_SEPARATOR  CS @stable.
U_BLOCK_SEPARATOR  B @stable.
U_SEGMENT_SEPARATOR  S @stable.
U_WHITE_SPACE_NEUTRAL  WS @stable.
U_OTHER_NEUTRAL  ON @stable.
U_LEFT_TO_RIGHT_EMBEDDING  LRE @stable.
U_LEFT_TO_RIGHT_OVERRIDE  LRO @stable.
U_RIGHT_TO_LEFT_ARABIC  AL @stable.
U_RIGHT_TO_LEFT_EMBEDDING  RLE @stable.
U_RIGHT_TO_LEFT_OVERRIDE  RLO @stable.
U_POP_DIRECTIONAL_FORMAT  PDF @stable.
U_DIR_NON_SPACING_MARK  NSM @stable.
U_BOUNDARY_NEUTRAL  BN @stable.
U_CHAR_DIRECTION_COUNT  @stable.

enum UCharNameChoice
 

Selector constants for u_charName().

u_charName() returns the "modern" name of a Unicode character; or the name that was defined in Unicode version 1.0, before the Unicode standard merged with ISO-10646; or an "extended" name that gives each Unicode code point a unique name.

See also:
u_charName @stable


Function Documentation

U_CAPI uint16_t U_EXPORT2 u_charCellWidth UChar32    c
 

Returns a value indicating the display-cell width of the character when used in Asian text, according to the Unicode standard (see p.

6-130 of The Unicode Standard, Version 2.0). The results for various characters are as follows:

ZERO_WIDTH: Characters which are considered to take up no display-cell space: control characters format characters line and paragraph separators non-spacing marks combining Hangul jungseong combining Hangul jongseong unassigned Unicode values

HALF_WIDTH: Characters which take up half a cell in standard Asian text: all characters in the General Scripts Area except combining Hangul choseong and the characters called out specifically above as ZERO_WIDTH alphabetic and Arabic presentation forms halfwidth CJK punctuation halfwidth Katakana halfwidth Hangul Jamo halfwidth forms, arrows, and shapes

FULL_WIDTH: Characters which take up a full cell in standard Asian text: combining Hangul choseong all characters in the CJK Phonetics and Symbols Area all characters in the CJK Ideographs Area all characters in the Hangul Syllables Area CJK compatibility ideographs CJK compatibility forms small form variants fullwidth ASCII fullwidth punctuation and currency signs

NEUTRAL: Characters whose cell width is context-dependent: all characters in the Symbols Area, except those specifically called out above all characters in the Surrogates Area all charcaters in the Private Use Area

For Korean text, this algorithm should work properly with properly normalized Korean text. Precomposed Hangul syllables and non-combining jamo are all considered full- width characters. For combining jamo, we treat we treat choseong (initial consonants) as double-width characters and junseong (vowels) and jongseong (final consonants) as non-spacing marks. This will work right in text that uses the precomposed choseong characters instead of teo choseong characters in a row, and which uses the choseong filler character at the beginning of syllables that don't have an initial consonant. The results may be slightly off with Korean text following different conventions. @stable

U_CAPI int32_t U_EXPORT2 u_charDigitValue UChar32    c
 

Retrives the decimal numeric value of a digit character.

Parameters:
c  the digit character for which to get the numeric value
Returns:
the numeric value of ch in decimal radix. This method returns -1 if ch is not a valid digit character. @stable

U_CAPI UCharDirection U_EXPORT2 u_charDirection UChar32    c
 

Returns the linguistic direction property of a character.

Returns the linguistic direction property of a character. For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional property.

See also:
UCharDirection @stable

U_CAPI UChar32 U_EXPORT2 u_charFromName UCharNameChoice    nameChoice,
const char *    name,
UErrorCode   pErrorCode
 

Find a Unicode character by its name and return its code point value.

The name is matched exactly and completely. If the name does not correspond to a code point, pErrorCode is set to U_INVALID_CHAR_FOUND. A Unicode 1.0 name is matched only if it differs from the modern name. Unicode names are all uppercase. Extended names are lowercase followed by an uppercase hexadecimal number, and within angle brackets.

Parameters:
nameChoice  Selector for which name to match.
name  The name to match.
pErrorCode  Pointer to a UErrorCode variable
Returns:
The Unicode value of the code point with the given name, or an undefined value if there is no such code point.
See also:
UCharNameChoice , u_charName , u_enumCharNames

U_CAPI UChar32 U_EXPORT2 u_charMirror UChar32    c
 

Maps the specified character to a "mirror-image" character.

For characters with the "mirrored" property, implementations sometimes need a "poor man's" mapping to another Unicode character (code point) such that the default glyph may serve as the mirror-image of the default glyph of the specified character. This is useful for text conversion to and from codepages with visual order, and for displays without glyph selecetion capabilities.

Parameters:
c  the character (code point, Unicode scalar value) to be mapped
Returns:
another Unicode code point that may serve as a mirror-image substitute, or c itself if there is no such mapping or c does not have the "mirrored" property @stable

U_CAPI UTextOffset U_EXPORT2 u_charName UChar32    code,
UCharNameChoice    nameChoice,
char *    buffer,
UTextOffset    bufferLength,
UErrorCode   pErrorCode
 

Retrieve the name of a Unicode character.

Depending on nameChoice, the character name written into the buffer is the "modern" name or the name that was defined in Unicode version 1.0. The name contains only "invariant" characters like A-Z, 0-9, space, and '-'. Unicode 1.0 names are only retrieved if they are different from the modern names and if the data file contains the data for them. gennames may or may not be called with a command line option to include 1.0 names in unames.dat.

Parameters:
code  The character (code point) for which to get the name. It must be 0<=code<0x10ffff.
nameChoice  Selector for which name to get.
buffer  Destination address for copying the name. The name will always be zero-terminated. If there is no name, then the buffer will be set to the empty string.
bufferLength  ==sizeof(buffer)
pErrorCode  Pointer to a UErrorCode variable; check for U_SUCCESS() after u_charName() returns.
Returns:
The length of the name, or 0 if there is no name for this character. If the bufferLength is less than or equal to the length, then the buffer contains the truncated name and the returned length indicates the full length of the name. The length does not include the zero-termination.
See also:
UCharNameChoice , u_charFromName , u_enumCharNames @stable

U_CAPI int8_t U_EXPORT2 u_charType UChar32    c
 

Returns a value indicating a character category.

The categories are taken from the Unicode Character Database (UCD) in UnicodeData.txt.

Parameters:
c  the character to be tested
Returns:
a value of type int, the character category.
See also:
UCharCategory @stable

U_CAPI int32_t U_EXPORT2 u_digit UChar32    ch,
int8_t    radix
 

Returns the numeric value of the character ch in the specified radix.

If the radix is not in the range 2 <= radix <= 36 or if the value of ch is not a valid digit in the specified radix, -1 is returned. A character is a valid digit if at least one of the following is true:

  • The method u_isdigit is true of the character and the Unicode decimal digit value of the character (or its single-character decomposition) is less than the specified radix. In this case the decimal digit value is returned.
  • The character is one of the uppercase Latin letters 'A' through 'Z' and its code is less than radix + 'A' - 10. In this case, ch - 'A' + 10 is returned.
  • The character is one of the lowercase Latin letters 'a' through 'z' and its code is less than radix + 'a' - 10. In this case, ch - 'a' + 10 is returned.
Parameters:
ch  the character to be converted.
radix  the radix.
Returns:
the numeric value represented by the character in the specified radix.
See also:
u_forDigit , u_charDigitValue , u_isdigit @draft ICU 2.0

U_CAPI void U_EXPORT2 u_enumCharNames UChar32    start,
UChar32    limit,
UEnumCharNamesFn   fn,
void *    context,
UCharNameChoice    nameChoice,
UErrorCode   pErrorCode
 

Enumerate all assigned Unicode characters between the start and limit code points (start inclusive, limit exclusive) and call a function for each, passing the code point value and the character name.

For Unicode 1.0 names, only those are enumerated that differ from the modern names.

Parameters:
start  The first code point in the enumeration range.
limit  One more than the last code point in the enumeration range (the first one after the range).
fn  The function that is to be called for each character name.
context  An arbitrary pointer that is passed to the function.
nameChoice  Selector for which kind of names to enumerate.
pErrorCode  Pointer to a UErrorCode variable
See also:
UCharNameChoice , UEnumCharNamesFn , u_charName , u_charFromName

U_CAPI void U_EXPORT2 u_enumCharTypes UCharEnumTypeRange   enumRange,
const void *    context
 

Enumerate efficiently all code points with their Unicode general categories.

This is useful for building data structures (e.g., UnicodeSet's), for enumerating all assigned code points (type!=U_UNASSIGNED), etc.

For each contiguous range of code points with a given general category ("character type"), the UCharEnumTypeRange function is called. Adjacent ranges have different types. The Unicode Standard guarantees that the numeric value of the type is 0..31.

Parameters:
enumRange  a pointer to a function that is called for each contiguous range of code points with the same general category
context  an opaque pointer that is passed on to the callback function
@draft ICU 2.1
See also:
UCharCategory , UCharEnumTypeRange

U_CAPI UChar32 U_EXPORT2 u_foldCase UChar32    c,
uint32_t    options
 

The given character is mapped to its case folding equivalent according to UnicodeData.txt and CaseFolding.txt; if the character has no case folding equivalent, the character itself is returned.

Only "simple", single-code point case folding mappings are used. "Full" mappings are used by u_strFoldCase().

Parameters:
c  the character to be converted
options  Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
Returns:
the case folding equivalent of the character, if any; otherwise the character itself. @draft ICU 1.8

U_CAPI UChar32 U_EXPORT2 u_forDigit int32_t    digit,
int8_t    radix
 

Determines the character representation for a specific digit in the specified radix.

If the value of radix is not a valid radix, or the value of digit is not a valid digit in the specified radix, the null character (U+0000) is returned.

The radix argument is valid if it is greater than or equal to 2 and less than or equal to 36. The digit argument is valid if 0 <= digit < radix.

If the digit is less than 10, then '0' + digit is returned. Otherwise, the value 'a' + digit - 10 is returned.

Parameters:
digit  the number to convert to a character.
radix  the radix.
Returns:
the char representation of the specified digit in the specified radix.
See also:
u_digit , u_charDigitValue , u_isdigit @draft ICU 2.0

U_CAPI uint8_t U_EXPORT2 u_getCombiningClass UChar32    c
 

Returns the combining class of the code point as specified in UnicodeData.txt.

Parameters:
c  the code point of the character
Returns:
the combining class of the character @stable

U_CAPI void U_EXPORT2 u_getUnicodeVersion UVersionInfo    info
 

Gets the Unicode version information.

The version array stores the version information for the Unicode standard that is currently used by ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.

Parameters:
versionArray  the version # information, the result will be filled in @stable

U_CAPI UBool U_EXPORT2 u_isalnum UChar32    c
 

Determines whether the specified character is an alphanumeric character (letter or digit)according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a letter or a digit; false otherwise. @stable

U_CAPI UBool U_EXPORT2 u_isalpha UChar32    c
 

Determines whether the specified character is a letter according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a letter; false otherwise.
See also:
u_isdigit , u_isalnum @stable

U_CAPI UBool U_EXPORT2 u_isbase UChar32    c
 

Determines whether the specified character is of the base form according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is of the base form; false otherwise.
See also:
u_isalpha , u_isdigit @stable

U_CAPI UBool U_EXPORT2 u_iscntrl UChar32    c
 

Determines whether the specified character is a control character or not.

A control character is one of the following:

  • ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
  • U_CONTROL_CHAR (Cc)
  • U_FORMAT_CHAR (Cf)
  • U_LINE_SEPARATOR (Zl)
  • U_PARAGRAPH_SEPARATOR (Zp)
Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is a control character; false otherwise.
See also:
u_isprint @stable

U_CAPI UBool U_EXPORT2 u_isdefined UChar32    c
 

Determines whether the specified numeric value is actually a defined character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character has a defined Unicode meaning; false otherwise.
See also:
u_isdigit , u_isalpha , u_isalnum , u_isupper , u_islower , u_istitle @stable

U_CAPI UBool U_EXPORT2 u_isdigit UChar32    c
 

Determines whether the specified character is a digit according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is a digit; false otherwise. @stable

U_CAPI UBool U_EXPORT2 u_isIDIgnorable UChar32    c
 

A convenience method for determining if a Unicode character should be regarded as an ignorable character in a Unicode identifier.

The following Unicode characters are ignorable in a Unicode identifier:
0x0000 through 0x0008, ISO control characters that
0x000E through 0x001B, are not whitespace
and 0x007F through 0x009F
0x200C through 0x200F join controls
0x200A through 0x200E bidirectional controls
0x206A through 0x206F format controls
0xFEFF zero-width no-break space

Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDPart @stable

U_CAPI UBool U_EXPORT2 u_isIDPart UChar32    c
 

A convenience method for determining if a Unicode character may be part of a Unicode identifier other than the starting character.

A character may be part of a Unicode identifier if and only if it is one of the following:

  • a letter
  • a connecting punctuation character (such as "_").
  • a digit
  • a numeric letter (such as a Roman numeral character)
  • a combining mark
  • a non-spacing mark
  • an ignorable control character
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDIgnorable , u_isIDStart @stable

U_CAPI UBool U_EXPORT2 u_isIDStart UChar32    c
 

A convenience method for determining if a Unicode character is allowed to start in a Unicode identifier.

A character may start a Unicode identifier if and only if it is a letter.

Parameters:
c  the Unicode character.
Returns:
TRUE if the character may start a Unicode identifier; FALSE otherwise.
See also:
u_isalpha , u_isIDPart @stable

U_CAPI UBool U_EXPORT2 u_isJavaIDPart UChar32    c
 

A convenience method for determining if a Unicode character may be part of a Java identifier other than the starting character.

A character may be part of a Java identifier if and only if it is one of the following:

  • a letter
  • a currency symbol (such as "$")
  • a connecting punctuation character (such as "_").
  • a digit
  • a numeric letter (such as a Roman numeral character)
  • a combining mark
  • a non-spacing mark
  • an ignorable control character
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may be part of a Unicode identifier; FALSE otherwise.
See also:
u_isIDIgnorable , u_isJavaIDStart , u_isalpha , u_isdigit , u_isIDPart @stable

U_CAPI UBool U_EXPORT2 u_isJavaIDStart UChar32    c
 

A convenience method for determining if a Unicode character is allowed as the first character in a Java identifier.

A character may start a Java identifier if and only if it is one of the following:

  • a letter
  • a currency symbol (such as "$")
  • a connecting punctuation symbol (such as "_").
Parameters:
c  the Unicode character.
Returns:
TRUE if the character may start a Java identifier; FALSE otherwise.
See also:
u_isJavaIDPart , u_isalpha , u_isIDStart @stable

U_CAPI UBool U_EXPORT2 u_islower UChar32    c
 

Determines whether the specified UChar is a lowercase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is lowercase; false otherwise.
See also:
UNICODE_VERSION , u_isupper , u_istitle , u_islower @stable

U_CAPI UBool U_EXPORT2 u_isMirrored UChar32    c
 

Determines whether the character has the "mirrored" property.

This property is set for characters that are commonly used in Right-To-Left contexts and need to be displayed with a "mirrored" glyph.

Parameters:
c  the character (code point, Unicode scalar value) to be tested
Returns:
TRUE if the character has the "mirrored" property @stable

U_CAPI UBool U_EXPORT2 u_isprint UChar32    c
 

Determines whether the specified character is a printable character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the Unicode character is a printable character; false otherwise.
See also:
u_iscntrl @stable

U_CAPI UBool U_EXPORT2 u_isspace UChar32    c
 

Determines if the specified character is a space character or not.

Parameters:
ch  the character to be tested
Returns:
true if the character is a space character; false otherwise. @stable

U_CAPI UBool U_EXPORT2 u_istitle UChar32    c
 

Determines whether the specified character is a titlecase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is titlecase; false otherwise.
See also:
u_isupper , u_islower , u_totitle @stable

U_CAPI UBool U_EXPORT2 u_isupper UChar32    c
 

Determines whether the specified character is an uppercase character according to UnicodeData.txt.

Parameters:
ch  the character to be tested
Returns:
true if the character is uppercase; false otherwise.
See also:
u_islower , u_istitle , u_tolower @stable

U_CAPI UBool U_EXPORT2 u_isWhitespace UChar32    c
 

Determines if the specified character is white space according to ICU.

A character is considered to be an ICU whitespace character if and only if it satisfies one of the following criteria:

  • It is a Unicode space separator (category "Zs"), but is not a no-break space (&#92;u00A0 or &#92;uFEFF).
  • It is a Unicode line separator (category "Zl").
  • It is a Unicode paragraph separator (category "Zp").
  • It is &#92;u0009, HORIZONTAL TABULATION.
  • It is &#92;u000A, LINE FEED.
  • It is &#92;u000B, VERTICAL TABULATION.
  • It is &#92;u000C, FORM FEED.
  • It is &#92;u000D, CARRIAGE RETURN.
  • It is &#92;u001C, FILE SEPARATOR.
  • It is &#92;u001D, GROUP SEPARATOR.
  • It is &#92;u001E, RECORD SEPARATOR.
  • It is &#92;u001F, UNIT SEPARATOR.
Note: This method corresponds to the Java method java.lang.Character.isWhitespace().
Parameters:
ch  the character to be tested.
Returns:
true if the character is an ICU whitespace character; false otherwise.
See also:
u_isspace @stable

U_CAPI UChar32 U_EXPORT2 u_tolower UChar32    c
 

The given character is mapped to its lowercase equivalent according to UnicodeData.txt; if the character has no lowercase equivalent, the character itself is returned.

A character has a lowercase equivalent if and only if a lowercase mapping is specified for the character in the UnicodeData.txt attribute table.

u_tolower() only deals with the general letter case conversion. For language specific case conversion behavior, use ustrToUpper(). For example, the case conversion for dot-less i and dotted I in Turkish, or for final sigma in Greek.

Parameters:
ch  the character to be converted
Returns:
the lowercase equivalent of the character, if any; otherwise the character itself. @stable

U_CAPI UChar32 U_EXPORT2 u_totitle UChar32    c
 

The given character is mapped to its titlecase equivalent according to UnicodeData.txt.

There are only four Unicode characters that are truly titlecase forms that are distinct from uppercase forms. As a rule, if a character has no true titlecase equivalent, its uppercase equivalent is returned.

A character has a titlecase equivalent if and only if a titlecase mapping is specified for the character in the UnicodeData.txt data.

Parameters:
ch  the character to be converted
Returns:
the titlecase equivalent of the character, if any; otherwise the character itself. @stable

U_CAPI UChar32 U_EXPORT2 u_toupper UChar32    c
 

The given character is mapped to its uppercase equivalent according to UnicodeData.txt; if the character has no uppercase equivalent, the character itself is returned.

u_toupper() only deals with the general letter case conversion. For language specific case conversion behavior, use ustrToUpper(). For example, the case conversion for dot-less i and dotted I in Turkish, or ess-zed (i.e., "sharp S") in German.

Parameters:
ch  the character to be converted
Returns:
the uppercase equivalent of the character, if any; otherwise the character itself. @stable

U_CAPI UBlockCode U_EXPORT2 ublock_getCode UChar32    ch
 

Returns the Unicode allocation block that contains the character.

See also:
UCharBlock @draft ICU 2.0


Generated on Tue Mar 5 06:25:21 2002 for ICU 2.0 by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002