ICU 66.1 66.1
regex.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2002-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: regex.h
9* encoding: UTF-8
10* indentation:4
11*
12* created on: 2002oct22
13* created by: Andy Heninger
14*
15* ICU Regular Expressions, API for C++
16*/
17
18#ifndef REGEX_H
19#define REGEX_H
20
21//#define REGEX_DEBUG
22
45#include "unicode/utypes.h"
46
47#if U_SHOW_CPLUSPLUS_API
48
49#if !UCONFIG_NO_REGULAR_EXPRESSIONS
50
51#include "unicode/uobject.h"
52#include "unicode/unistr.h"
53#include "unicode/utext.h"
54#include "unicode/parseerr.h"
55
56#include "unicode/uregex.h"
57
58// Forward Declarations
59
60struct UHashtable;
61
62U_NAMESPACE_BEGIN
63
64struct Regex8BitSet;
65class RegexCImpl;
66class RegexMatcher;
67class RegexPattern;
68struct REStackFrame;
69class RuleBasedBreakIterator;
70class UnicodeSet;
71class UVector;
72class UVector32;
73class UVector64;
74
75
88public:
89
98
106
112 virtual ~RegexPattern();
113
122 UBool operator==(const RegexPattern& that) const;
123
132 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
133
139 RegexPattern &operator =(const RegexPattern &source);
140
148 virtual RegexPattern *clone() const;
149
150
175 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
176 UParseError &pe,
177 UErrorCode &status);
178
205 static RegexPattern * U_EXPORT2 compile( UText *regex,
206 UParseError &pe,
207 UErrorCode &status);
208
233 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
234 uint32_t flags,
235 UParseError &pe,
236 UErrorCode &status);
237
264 static RegexPattern * U_EXPORT2 compile( UText *regex,
265 uint32_t flags,
266 UParseError &pe,
267 UErrorCode &status);
268
291 static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
292 uint32_t flags,
293 UErrorCode &status);
294
319 static RegexPattern * U_EXPORT2 compile( UText *regex,
320 uint32_t flags,
321 UErrorCode &status);
322
328 virtual uint32_t flags() const;
329
347 virtual RegexMatcher *matcher(const UnicodeString &input,
348 UErrorCode &status) const;
349
350private:
363 RegexMatcher *matcher(const char16_t *input,
364 UErrorCode &status) const;
365public:
366
367
379 virtual RegexMatcher *matcher(UErrorCode &status) const;
380
381
396 static UBool U_EXPORT2 matches(const UnicodeString &regex,
397 const UnicodeString &input,
398 UParseError &pe,
399 UErrorCode &status);
400
415 static UBool U_EXPORT2 matches(UText *regex,
416 UText *input,
417 UParseError &pe,
418 UErrorCode &status);
419
428 virtual UnicodeString pattern() const;
429
430
441 virtual UText *patternText(UErrorCode &status) const;
442
443
457 virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
458
459
476 virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
477
478
517 virtual int32_t split(const UnicodeString &input,
518 UnicodeString dest[],
519 int32_t destCapacity,
520 UErrorCode &status) const;
521
522
561 virtual int32_t split(UText *input,
562 UText *dest[],
563 int32_t destCapacity,
564 UErrorCode &status) const;
565
566
573
579 static UClassID U_EXPORT2 getStaticClassID();
580
581private:
582 //
583 // Implementation Data
584 //
585 UText *fPattern; // The original pattern string.
586 UnicodeString *fPatternString; // The original pattern UncodeString if relevant
587 uint32_t fFlags; // The flags used when compiling the pattern.
588 //
589 UVector64 *fCompiledPat; // The compiled pattern p-code.
590 UnicodeString fLiteralText; // Any literal string data from the pattern,
591 // after un-escaping, for use during the match.
592
593 UVector *fSets; // Any UnicodeSets referenced from the pattern.
594 Regex8BitSet *fSets8; // (and fast sets for latin-1 range.)
595
596
597 UErrorCode fDeferredStatus; // status if some prior error has left this
598 // RegexPattern in an unusable state.
599
600 int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
601 // >= this value. For some patterns, this calculated
602 // value may be less than the true shortest
603 // possible match.
604
605 int32_t fFrameSize; // Size of a state stack frame in the
606 // execution engine.
607
608 int32_t fDataSize; // The size of the data needed by the pattern that
609 // does not go on the state stack, but has just
610 // a single copy per matcher.
611
612 UVector32 *fGroupMap; // Map from capture group number to position of
613 // the group's variables in the matcher stack frame.
614
615 UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined
616 // regex character classes, e.g. Word.
617
618 Regex8BitSet *fStaticSets8; // Ptr to the static (shared) latin-1 only
619 // sets for predefined regex classes.
620
621 int32_t fStartType; // Info on how a match must start.
622 int32_t fInitialStringIdx; //
623 int32_t fInitialStringLen;
624 UnicodeSet *fInitialChars;
625 UChar32 fInitialChar;
626 Regex8BitSet *fInitialChars8;
627 UBool fNeedsAltInput;
628
629 UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
630
631 friend class RegexCompile;
632 friend class RegexMatcher;
633 friend class RegexCImpl;
634
635 //
636 // Implementation Methods
637 //
638 void init(); // Common initialization, for use by constructors.
639 bool initNamedCaptureMap(); // Lazy init for fNamedCaptureMap.
640 void zap(); // Common cleanup
641
642 void dumpOp(int32_t index) const;
643
644 public:
645#ifndef U_HIDE_INTERNAL_API
650 void dumpPattern() const;
651#endif /* U_HIDE_INTERNAL_API */
652};
653
654
655
666public:
667
681 RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
682
697 RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
698
719 RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
720 uint32_t flags, UErrorCode &status);
721
742 RegexMatcher(UText *regexp, UText *input,
743 uint32_t flags, UErrorCode &status);
744
745private:
757 RegexMatcher(const UnicodeString &regexp, const char16_t *input,
758 uint32_t flags, UErrorCode &status);
759public:
760
761
767 virtual ~RegexMatcher();
768
769
776 virtual UBool matches(UErrorCode &status);
777
778
789 virtual UBool matches(int64_t startIndex, UErrorCode &status);
790
791
805 virtual UBool lookingAt(UErrorCode &status);
806
807
821 virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
822
823
836 virtual UBool find();
837
838
853 virtual UBool find(UErrorCode &status);
854
864 virtual UBool find(int64_t start, UErrorCode &status);
865
866
876 virtual UnicodeString group(UErrorCode &status) const;
877
878
896 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
897
903 virtual int32_t groupCount() const;
904
905
920 virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
921
942 virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
943
951 virtual int32_t start(UErrorCode &status) const;
952
960 virtual int64_t start64(UErrorCode &status) const;
961
962
976 virtual int32_t start(int32_t group, UErrorCode &status) const;
977
991 virtual int64_t start64(int32_t group, UErrorCode &status) const;
992
1006 virtual int32_t end(UErrorCode &status) const;
1007
1021 virtual int64_t end64(UErrorCode &status) const;
1022
1023
1041 virtual int32_t end(int32_t group, UErrorCode &status) const;
1042
1060 virtual int64_t end64(int32_t group, UErrorCode &status) const;
1061
1071
1072
1088 virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
1089
1090
1108 virtual RegexMatcher &reset(const UnicodeString &input);
1109
1110
1124 virtual RegexMatcher &reset(UText *input);
1125
1126
1152
1153private:
1166 RegexMatcher &reset(const char16_t *input);
1167public:
1168
1176 virtual const UnicodeString &input() const;
1177
1186 virtual UText *inputText() const;
1187
1198 virtual UText *getInput(UText *dest, UErrorCode &status) const;
1199
1200
1219 virtual RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1220
1232 virtual RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1233
1242 virtual int32_t regionStart() const;
1243
1252 virtual int64_t regionStart64() const;
1253
1254
1263 virtual int32_t regionEnd() const;
1264
1273 virtual int64_t regionEnd64() const;
1274
1284
1304
1305
1313 virtual UBool hasAnchoringBounds() const;
1314
1315
1329
1330
1343 virtual UBool hitEnd() const;
1344
1354 virtual UBool requireEnd() const;
1355
1356
1362 virtual const RegexPattern &pattern() const;
1363
1364
1381 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1382
1383
1404 virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1405
1406
1427 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
1428
1429
1454 virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1455
1456
1485 const UnicodeString &replacement, UErrorCode &status);
1486
1487
1516 UText *replacement, UErrorCode &status);
1517
1518
1530
1531
1545 virtual UText *appendTail(UText *dest, UErrorCode &status);
1546
1547
1571 virtual int32_t split(const UnicodeString &input,
1572 UnicodeString dest[],
1573 int32_t destCapacity,
1574 UErrorCode &status);
1575
1576
1600 virtual int32_t split(UText *input,
1601 UText *dest[],
1602 int32_t destCapacity,
1603 UErrorCode &status);
1604
1626 virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1627
1634 virtual int32_t getTimeLimit() const;
1635
1657 virtual void setStackLimit(int32_t limit, UErrorCode &status);
1658
1666 virtual int32_t getStackLimit() const;
1667
1668
1683 const void *context,
1684 UErrorCode &status);
1685
1686
1697 virtual void getMatchCallback(URegexMatchCallback *&callback,
1698 const void *&context,
1699 UErrorCode &status);
1700
1701
1716 const void *context,
1717 UErrorCode &status);
1718
1719
1731 const void *&context,
1732 UErrorCode &status);
1733
1734#ifndef U_HIDE_INTERNAL_API
1740 void setTrace(UBool state);
1741#endif /* U_HIDE_INTERNAL_API */
1742
1748 static UClassID U_EXPORT2 getStaticClassID();
1749
1756
1757private:
1758 // Constructors and other object boilerplate are private.
1759 // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1760 RegexMatcher(); // default constructor not implemented
1761 RegexMatcher(const RegexPattern *pat);
1762 RegexMatcher(const RegexMatcher &other);
1763 RegexMatcher &operator =(const RegexMatcher &rhs);
1764 void init(UErrorCode &status); // Common initialization
1765 void init2(UText *t, UErrorCode &e); // Common initialization, part 2.
1766
1767 friend class RegexPattern;
1768 friend class RegexCImpl;
1769public:
1770#ifndef U_HIDE_INTERNAL_API
1772 void resetPreserveRegion(); // Reset matcher state, but preserve any region.
1773#endif /* U_HIDE_INTERNAL_API */
1774private:
1775
1776 //
1777 // MatchAt This is the internal interface to the match engine itself.
1778 // Match status comes back in matcher member variables.
1779 //
1780 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1781 inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1782 UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
1783 UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test
1784 REStackFrame *resetStack();
1785 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1786 void IncrementTime(UErrorCode &status);
1787
1788 // Call user find callback function, if set. Return TRUE if operation should be interrupted.
1789 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1790
1791 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1792
1793 UBool findUsingChunk(UErrorCode &status);
1794 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1795 UBool isChunkWordBoundary(int32_t pos);
1796
1797 const RegexPattern *fPattern;
1798 RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the pattern, and
1799 // should delete it when through.
1800
1801 const UnicodeString *fInput; // The string being matched. Only used for input()
1802 UText *fInputText; // The text being matched. Is never NULL.
1803 UText *fAltInputText; // A shallow copy of the text being matched.
1804 // Only created if the pattern contains backreferences.
1805 int64_t fInputLength; // Full length of the input text.
1806 int32_t fFrameSize; // The size of a frame in the backtrack stack.
1807
1808 int64_t fRegionStart; // Start of the input region, default = 0.
1809 int64_t fRegionLimit; // End of input region, default to input.length.
1810
1811 int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $).
1812 int64_t fAnchorLimit; // See useAnchoringBounds
1813
1814 int64_t fLookStart; // Region bounds for look-ahead/behind and
1815 int64_t fLookLimit; // and other boundary tests. See
1816 // useTransparentBounds
1817
1818 int64_t fActiveStart; // Currently active bounds for matching.
1819 int64_t fActiveLimit; // Usually is the same as region, but
1820 // is changed to fLookStart/Limit when
1821 // entering look around regions.
1822
1823 UBool fTransparentBounds; // True if using transparent bounds.
1824 UBool fAnchoringBounds; // True if using anchoring bounds.
1825
1826 UBool fMatch; // True if the last attempted match was successful.
1827 int64_t fMatchStart; // Position of the start of the most recent match
1828 int64_t fMatchEnd; // First position after the end of the most recent match
1829 // Zero if no previous match, even when a region
1830 // is active.
1831 int64_t fLastMatchEnd; // First position after the end of the previous match,
1832 // or -1 if there was no previous match.
1833 int64_t fAppendPosition; // First position after the end of the previous
1834 // appendReplacement(). As described by the
1835 // JavaDoc for Java Matcher, where it is called
1836 // "append position"
1837 UBool fHitEnd; // True if the last match touched the end of input.
1838 UBool fRequireEnd; // True if the last match required end-of-input
1839 // (matched $ or Z)
1840
1841 UVector64 *fStack;
1842 REStackFrame *fFrame; // After finding a match, the last active stack frame,
1843 // which will contain the capture group results.
1844 // NOT valid while match engine is running.
1845
1846 int64_t *fData; // Data area for use by the compiled pattern.
1847 int64_t fSmallData[8]; // Use this for data if it's enough.
1848
1849 int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
1850 // match engine run. Zero for unlimited.
1851
1852 int32_t fTime; // Match time, accumulates while matching.
1853 int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
1854 // Kept separately from fTime to keep as much
1855 // code as possible out of the inline
1856 // StateSave function.
1857
1858 int32_t fStackLimit; // Maximum memory size to use for the backtrack
1859 // stack, in bytes. Zero for unlimited.
1860
1861 URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct.
1862 // NULL if there is no callback.
1863 const void *fCallbackContext; // User Context ptr for callback function.
1864
1865 URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct.
1866 // NULL if there is no callback.
1867 const void *fFindProgressCallbackContext; // User Context ptr for callback function.
1868
1869
1870 UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1871
1872 UBool fTraceDebug; // Set true for debug tracing of match engine.
1873
1874 UErrorCode fDeferredStatus; // Save error state that cannot be immediately
1875 // reported, or that permanently disables this matcher.
1876
1877 RuleBasedBreakIterator *fWordBreakItr;
1878};
1879
1880U_NAMESPACE_END
1881#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
1882
1883#endif /* U_SHOW_CPLUSPLUS_API */
1884
1885#endif
class RegexMatcher bundles together a regular expression pattern and input text to which the expressi...
Definition: regex.h:665
virtual int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual UText * inputText() const
Returns the input string being matched.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
void resetPreserveRegion()
virtual UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
virtual int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
virtual RegexMatcher & reset()
Resets this matcher.
virtual int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
virtual UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
virtual UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
virtual UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
virtual int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual UBool find()
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
RegexMatcher(const UnicodeString &regexp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
virtual int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
virtual void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
virtual void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
virtual RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
virtual int64_t regionStart64() const
Reports the start index of this matcher's region.
virtual UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual ~RegexMatcher()
Destructor.
virtual UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
virtual UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
virtual UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
virtual UBool find(UErrorCode &status)
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
virtual UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
virtual int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
virtual int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
virtual const UnicodeString & input() const
Returns the input string being matched.
virtual UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
virtual UBool hitEnd() const
Return TRUE if the most recent matching operation attempted to access additional input beyond the ava...
virtual UBool requireEnd() const
Return TRUE the most recent match succeeded and additional input could cause it to fail.
virtual UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
virtual int32_t regionStart() const
Reports the start index of this matcher's region.
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
virtual RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
Class RegexPattern represents a compiled regular expression.
Definition: regex.h:87
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
virtual ~RegexPattern()
Destructor.
virtual uint32_t flags() const
Get the URegexpFlag match mode flags that were used when compiling this pattern.
void dumpPattern() const
Dump a compiled pattern.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UBool operator!=(const RegexPattern &that) const
Comparison operator.
Definition: regex.h:132
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
UBool operator==(const RegexPattern &that) const
Comparison operator.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static UBool matches(const UnicodeString &regex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
RegexPattern(const RegexPattern &source)
Copy Constructor.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
RegexPattern()
default constructor.
virtual UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
virtual UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
static RegexPattern * compile(const UnicodeString &regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:55
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:281
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
struct UHashtable UHashtable
Definition: msgfmt.h:43
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1347
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition: umachine.h:140
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1573
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1499
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301