ICU 66.1 66.1
translit.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1999-2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* Date Name Description
9* 11/17/99 aliu Creation.
10**********************************************************************
11*/
12#ifndef TRANSLIT_H
13#define TRANSLIT_H
14
15#include "unicode/utypes.h"
16
17#if U_SHOW_CPLUSPLUS_API
18
24#if !UCONFIG_NO_TRANSLITERATION
25
26#include "unicode/uobject.h"
27#include "unicode/unistr.h"
28#include "unicode/parseerr.h"
29#include "unicode/utrans.h" // UTransPosition, UTransDirection
30#include "unicode/strenum.h"
31
32U_NAMESPACE_BEGIN
33
34class UnicodeFilter;
35class UnicodeSet;
36class TransliteratorParser;
37class NormalizationTransliterator;
38class TransliteratorIDParser;
39
491
492private:
493
497 UnicodeString ID;
498
505 UnicodeFilter* filter;
506
507 int32_t maximumContextLength;
508
509 public:
510
516 union Token {
521 int32_t integer;
526 void* pointer;
527 };
528
529#ifndef U_HIDE_INTERNAL_API
535 inline static Token integerToken(int32_t);
536
542 inline static Token pointerToken(void*);
543#endif /* U_HIDE_INTERNAL_API */
544
560 typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
561
562protected:
563
573 Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
574
580
586
599 const UnicodeString* canon);
600
601 friend class TransliteratorParser; // for parseID()
602 friend class TransliteratorIDParser; // for createBasicInstance()
603 friend class TransliteratorAlias; // for setID()
604
605public:
606
612
627 virtual Transliterator* clone() const;
628
644 virtual int32_t transliterate(Replaceable& text,
645 int32_t start, int32_t limit) const;
646
652 virtual void transliterate(Replaceable& text) const;
653
718 virtual void transliterate(Replaceable& text, UTransPosition& index,
719 const UnicodeString& insertion,
720 UErrorCode& status) const;
721
737 virtual void transliterate(Replaceable& text, UTransPosition& index,
738 UChar32 insertion,
739 UErrorCode& status) const;
740
754 virtual void transliterate(Replaceable& text, UTransPosition& index,
755 UErrorCode& status) const;
756
769 UTransPosition& index) const;
770
771private:
772
788 void _transliterate(Replaceable& text,
789 UTransPosition& index,
790 const UnicodeString* insertion,
791 UErrorCode &status) const;
792
793protected:
794
875 UTransPosition& pos,
876 UBool incremental) const = 0;
877
878public:
891 UTransPosition& index,
892 UBool incremental) const;
893
894private:
895
923 virtual void filteredTransliterate(Replaceable& text,
924 UTransPosition& index,
925 UBool incremental,
926 UBool rollback) const;
927
928public:
929
943 int32_t getMaximumContextLength(void) const;
944
945protected:
946
953 void setMaximumContextLength(int32_t maxContextLength);
954
955public:
956
967 virtual const UnicodeString& getID(void) const;
968
978 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
979 UnicodeString& result);
980
1002 static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
1003 const Locale& inLocale,
1004 UnicodeString& result);
1005
1013 const UnicodeFilter* getFilter(void) const;
1014
1025
1036 void adoptFilter(UnicodeFilter* adoptedFilter);
1037
1058
1075 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1076 UTransDirection dir,
1077 UParseError& parseError,
1078 UErrorCode& status);
1079
1090 static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
1091 UTransDirection dir,
1092 UErrorCode& status);
1093
1111 static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
1112 const UnicodeString& rules,
1113 UTransDirection dir,
1114 UParseError& parseError,
1115 UErrorCode& status);
1116
1129 UBool escapeUnprintable) const;
1130
1143 int32_t countElements() const;
1144
1164 const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
1165
1182
1197 virtual void handleGetSourceSet(UnicodeSet& result) const;
1198
1212 virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
1213
1214public:
1215
1232 static void U_EXPORT2 registerFactory(const UnicodeString& id,
1233 Factory factory,
1234 Token context);
1235
1257 static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1258
1273 static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1274 const UnicodeString& realID);
1275
1276protected:
1277
1278#ifndef U_HIDE_INTERNAL_API
1288 static void _registerFactory(const UnicodeString& id,
1289 Factory factory,
1290 Token context);
1291
1295 static void _registerInstance(Transliterator* adoptedObj);
1296
1300 static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1301
1335 static void _registerSpecialInverse(const UnicodeString& target,
1336 const UnicodeString& inverseTarget,
1337 UBool bidirectional);
1338#endif /* U_HIDE_INTERNAL_API */
1339
1340public:
1341
1359 static void U_EXPORT2 unregister(const UnicodeString& ID);
1360
1361public:
1362
1373
1379 static int32_t U_EXPORT2 countAvailableSources(void);
1380
1390 static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1391 UnicodeString& result);
1392
1401 static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1402
1414 static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1415 const UnicodeString& source,
1416 UnicodeString& result);
1417
1425 static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1426 const UnicodeString& target);
1427
1441 static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1442 const UnicodeString& source,
1443 const UnicodeString& target,
1444 UnicodeString& result);
1445
1446protected:
1447
1448#ifndef U_HIDE_INTERNAL_API
1453 static int32_t _countAvailableSources(void);
1454
1459 static UnicodeString& _getAvailableSource(int32_t index,
1460 UnicodeString& result);
1461
1466 static int32_t _countAvailableTargets(const UnicodeString& source);
1467
1472 static UnicodeString& _getAvailableTarget(int32_t index,
1473 const UnicodeString& source,
1474 UnicodeString& result);
1475
1480 static int32_t _countAvailableVariants(const UnicodeString& source,
1481 const UnicodeString& target);
1482
1488 const UnicodeString& source,
1489 const UnicodeString& target,
1490 UnicodeString& result);
1491#endif /* U_HIDE_INTERNAL_API */
1492
1493protected:
1494
1501 void setID(const UnicodeString& id);
1502
1503public:
1504
1515 static UClassID U_EXPORT2 getStaticClassID(void);
1516
1532 virtual UClassID getDynamicClassID(void) const = 0;
1533
1534private:
1535 static UBool initializeRegistry(UErrorCode &status);
1536
1537public:
1538#ifndef U_HIDE_OBSOLETE_API
1546 static int32_t U_EXPORT2 countAvailableIDs(void);
1547
1560 static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1561#endif /* U_HIDE_OBSOLETE_API */
1562};
1563
1564inline int32_t Transliterator::getMaximumContextLength(void) const {
1565 return maximumContextLength;
1566}
1567
1568inline void Transliterator::setID(const UnicodeString& id) {
1569 ID = id;
1570 // NUL-terminate the ID string, which is a non-aliased copy.
1571 ID.append((char16_t)0);
1572 ID.truncate(ID.length()-1);
1573}
1574
1575#ifndef U_HIDE_INTERNAL_API
1576inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1577 Token t;
1578 t.integer = i;
1579 return t;
1580}
1581
1582inline Transliterator::Token Transliterator::pointerToken(void* p) {
1583 Token t;
1584 t.pointer = p;
1585 return t;
1586}
1587#endif /* U_HIDE_INTERNAL_API */
1588
1589U_NAMESPACE_END
1590
1591#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1592
1593#endif /* U_SHOW_CPLUSPLUS_API */
1594
1595#endif
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:61
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:490
static Transliterator * createFromRules(const UnicodeString &ID, const UnicodeString &rules, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object constructed from the given rule string.
virtual UClassID getDynamicClassID(void) const =0
Returns a unique class ID polymorphically.
static UnicodeString & getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Return a registered variant specifier for a given source-target pair.
virtual void transliterate(Replaceable &text) const
Transliterates an entire string in place.
static UClassID getStaticClassID(void)
Return the class ID for this class.
const UnicodeFilter * getFilter(void) const
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
const Transliterator & getElement(int32_t index, UErrorCode &ec) const
Return an element that makes up this transliterator.
virtual const UnicodeString & getID(void) const
Returns a programmatic identifier for this transliterator.
void adoptFilter(UnicodeFilter *adoptedFilter)
Changes the filter used by this transliterator.
static int32_t countAvailableIDs(void)
Return the number of IDs currently registered with the system.
static UnicodeString & _getAvailableVariant(int32_t index, const UnicodeString &source, const UnicodeString &target, UnicodeString &result)
Non-mutexed internal method.
static void _registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
static void unregister(const UnicodeString &ID)
Unregisters a transliterator or class.
static UnicodeString & getDisplayName(const UnicodeString &ID, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the default loc...
virtual int32_t transliterate(Replaceable &text, int32_t start, int32_t limit) const
Transliterates a segment of a string, with optional filtering.
Transliterator & operator=(const Transliterator &)
Assignment operator.
virtual UnicodeSet & getTargetSet(UnicodeSet &result) const
Returns the set of all characters that may be generated as replacement text by this transliterator.
virtual void handleGetSourceSet(UnicodeSet &result) const
Framework method that returns the set of all characters that may be modified in the input text by thi...
static UnicodeString & getDisplayName(const UnicodeString &ID, const Locale &inLocale, UnicodeString &result)
Returns a name for this transliterator that is appropriate for display to the user in the given local...
static int32_t countAvailableSources(void)
Return the number of registered source specifiers.
Transliterator * createInverse(UErrorCode &status) const
Returns this transliterator's inverse.
static UnicodeString & getAvailableSource(int32_t index, UnicodeString &result)
Return a registered source specifier.
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UErrorCode &status)
Returns a Transliterator object given its ID.
virtual ~Transliterator()
Destructor.
static void registerAlias(const UnicodeString &aliasID, const UnicodeString &realID)
Registers an ID string as an alias of another ID string.
static int32_t countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Return the number of registered variant specifiers for a given source-target pair.
static Transliterator * createBasicInstance(const UnicodeString &id, const UnicodeString *canon)
Create a transliterator from a basic ID.
virtual void transliterate(Replaceable &text, UTransPosition &index, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly.
virtual Transliterator * clone() const
Implements Cloneable.
virtual void handleTransliterate(Replaceable &text, UTransPosition &pos, UBool incremental) const =0
Abstract method that concrete subclasses define to implement their transliteration algorithm.
virtual void filteredTransliterate(Replaceable &text, UTransPosition &index, UBool incremental) const
Transliterate a substring of text, as specified by index, taking filters into account.
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:560
static UnicodeString & _getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Non-mutexed internal method.
static UnicodeString & getAvailableTarget(int32_t index, const UnicodeString &source, UnicodeString &result)
Return a registered target specifier for a given source.
static void _registerSpecialInverse(const UnicodeString &target, const UnicodeString &inverseTarget, UBool bidirectional)
Register two targets as being inverses of one another.
UnicodeSet & getSourceSet(UnicodeSet &result) const
Returns the set of all characters that may be modified in the input text by this Transliterator.
static int32_t _countAvailableSources(void)
Non-mutexed internal method.
static int32_t _countAvailableTargets(const UnicodeString &source)
Non-mutexed internal method.
virtual UnicodeString & toRules(UnicodeString &result, UBool escapeUnprintable) const
Create a rule string that can be passed to createFromRules() to recreate this transliterator.
virtual void transliterate(Replaceable &text, UTransPosition &index, const UnicodeString &insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after new text ...
static int32_t countAvailableTargets(const UnicodeString &source)
Return the number of registered target specifiers for a given source specifier.
Transliterator(const UnicodeString &ID, UnicodeFilter *adoptedFilter)
Default constructor.
Transliterator(const Transliterator &)
Copy constructor.
static void registerFactory(const UnicodeString &id, Factory factory, Token context)
Registers a factory function that creates transliterators of a given ID.
static UnicodeString & _getAvailableSource(int32_t index, UnicodeString &result)
Non-mutexed internal method.
void setMaximumContextLength(int32_t maxContextLength)
Method for subclasses to use to set the maximum context length.
static const UnicodeString & getAvailableID(int32_t index)
Return the index-th available ID.
static Transliterator * createInstance(const UnicodeString &ID, UTransDirection dir, UParseError &parseError, UErrorCode &status)
Returns a Transliterator object given its ID.
static void _registerFactory(const UnicodeString &id, Factory factory, Token context)
static void registerInstance(Transliterator *adoptedObj)
Registers an instance obj of a subclass of Transliterator with the system.
UnicodeFilter * orphanFilter(void)
Returns the filter used by this transliterator, or NULL if this transliterator uses no filter.
int32_t countElements() const
Return the number of elements that make up this transliterator.
virtual void finishTransliteration(Replaceable &text, UTransPosition &index) const
Finishes any pending transliterations that were waiting for more characters.
virtual void transliterate(Replaceable &text, UTransPosition &index, UChar32 insertion, UErrorCode &status) const
Transliterates the portion of the text buffer that can be transliterated unambiguosly after a new cha...
static void _registerInstance(Transliterator *adoptedObj)
static StringEnumeration * getAvailableIDs(UErrorCode &ec)
Return a StringEnumeration over the IDs available at the time of the call, including user-registered ...
static int32_t _countAvailableVariants(const UnicodeString &source, const UnicodeString &target)
Non-mutexed internal method.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:65
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:281
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4730
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4622
C API: Parse Error Information.
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:122
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:516
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:526
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:521
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Transliterator.
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:80
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301