ICU 66.1 66.1
uregex.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2004-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: uregex.h
9* encoding: UTF-8
10* indentation:4
11*
12* created on: 2004mar09
13* created by: Andy Heninger
14*
15* ICU Regular Expressions, API for C
16*/
17
25#ifndef UREGEX_H
26#define UREGEX_H
27
28#include "unicode/utext.h"
29#include "unicode/utypes.h"
30
31#if !UCONFIG_NO_REGULAR_EXPRESSIONS
32
34#include "unicode/parseerr.h"
35
43
44
49typedef enum URegexpFlag{
50
51#ifndef U_HIDE_DRAFT_API
56#endif /* U_HIDE_DRAFT_API */
59
62
66
78
84
91
100
109
111
135uregex_open( const UChar *pattern,
136 int32_t patternLength,
137 uint32_t flags,
138 UParseError *pe,
139 UErrorCode *status);
140
166 uint32_t flags,
167 UParseError *pe,
168 UErrorCode *status);
169
170#if !UCONFIG_NO_CONVERSION
195uregex_openC( const char *pattern,
196 uint32_t flags,
197 UParseError *pe,
198 UErrorCode *status);
199#endif
200
201
202
210U_STABLE void U_EXPORT2
212
213#if U_SHOW_CPLUSPLUS_API
214
215U_NAMESPACE_BEGIN
216
227
228U_NAMESPACE_END
229
230#endif
231
250U_STABLE URegularExpression * U_EXPORT2
252
270U_STABLE const UChar * U_EXPORT2
272 int32_t *patLength,
273 UErrorCode *status);
274
286U_STABLE UText * U_EXPORT2
288 UErrorCode *status);
289
298U_STABLE int32_t U_EXPORT2
300 UErrorCode *status);
301
302
323U_STABLE void U_EXPORT2
325 const UChar *text,
326 int32_t textLength,
327 UErrorCode *status);
328
329
346U_STABLE void U_EXPORT2
348 UText *text,
349 UErrorCode *status);
350
371U_STABLE const UChar * U_EXPORT2
373 int32_t *textLength,
374 UErrorCode *status);
375
392U_STABLE UText * U_EXPORT2
394 UText *dest,
395 UErrorCode *status);
396
422U_STABLE void U_EXPORT2
424 UText *text,
425 UErrorCode *status);
426
447U_STABLE UBool U_EXPORT2
449 int32_t startIndex,
450 UErrorCode *status);
451
473U_STABLE UBool U_EXPORT2
475 int64_t startIndex,
476 UErrorCode *status);
477
501U_STABLE UBool U_EXPORT2
503 int32_t startIndex,
504 UErrorCode *status);
505
530U_STABLE UBool U_EXPORT2
532 int64_t startIndex,
533 UErrorCode *status);
534
554U_STABLE UBool U_EXPORT2
556 int32_t startIndex,
557 UErrorCode *status);
558
579U_STABLE UBool U_EXPORT2
581 int64_t startIndex,
582 UErrorCode *status);
583
597U_STABLE UBool U_EXPORT2
599 UErrorCode *status);
600
608U_STABLE int32_t U_EXPORT2
610 UErrorCode *status);
611
628U_STABLE int32_t U_EXPORT2
630 const UChar *groupName,
631 int32_t nameLength,
632 UErrorCode *status);
633
634
652U_STABLE int32_t U_EXPORT2
654 const char *groupName,
655 int32_t nameLength,
656 UErrorCode *status);
657
674U_STABLE int32_t U_EXPORT2
676 int32_t groupNum,
677 UChar *dest,
678 int32_t destCapacity,
679 UErrorCode *status);
680
703U_STABLE UText * U_EXPORT2
705 int32_t groupNum,
706 UText *dest,
707 int64_t *groupLength,
708 UErrorCode *status);
709
724U_STABLE int32_t U_EXPORT2
726 int32_t groupNum,
727 UErrorCode *status);
728
744U_STABLE int64_t U_EXPORT2
746 int32_t groupNum,
747 UErrorCode *status);
748
762U_STABLE int32_t U_EXPORT2
764 int32_t groupNum,
765 UErrorCode *status);
766
781U_STABLE int64_t U_EXPORT2
783 int32_t groupNum,
784 UErrorCode *status);
785
799U_STABLE void U_EXPORT2
801 int32_t index,
802 UErrorCode *status);
803
818U_STABLE void U_EXPORT2
820 int64_t index,
821 UErrorCode *status);
822
843U_STABLE void U_EXPORT2
845 int32_t regionStart,
846 int32_t regionLimit,
847 UErrorCode *status);
848
870U_STABLE void U_EXPORT2
872 int64_t regionStart,
873 int64_t regionLimit,
874 UErrorCode *status);
875
890U_STABLE void U_EXPORT2
892 int64_t regionStart,
893 int64_t regionLimit,
894 int64_t startIndex,
895 UErrorCode *status);
896
906U_STABLE int32_t U_EXPORT2
908 UErrorCode *status);
909
920U_STABLE int64_t U_EXPORT2
922 UErrorCode *status);
923
934U_STABLE int32_t U_EXPORT2
936 UErrorCode *status);
937
949U_STABLE int64_t U_EXPORT2
951 UErrorCode *status);
952
963U_STABLE UBool U_EXPORT2
965 UErrorCode *status);
966
967
987U_STABLE void U_EXPORT2
989 UBool b,
990 UErrorCode *status);
991
992
1002U_STABLE UBool U_EXPORT2
1004 UErrorCode *status);
1005
1006
1020U_STABLE void U_EXPORT2
1022 UBool b,
1023 UErrorCode *status);
1024
1035U_STABLE UBool U_EXPORT2
1037 UErrorCode *status);
1038
1050U_STABLE UBool U_EXPORT2
1052 UErrorCode *status);
1053
1054
1055
1056
1057
1082U_STABLE int32_t U_EXPORT2
1084 const UChar *replacementText,
1085 int32_t replacementLength,
1086 UChar *destBuf,
1087 int32_t destCapacity,
1088 UErrorCode *status);
1089
1111U_STABLE UText * U_EXPORT2
1113 UText *replacement,
1114 UText *dest,
1115 UErrorCode *status);
1116
1141U_STABLE int32_t U_EXPORT2
1143 const UChar *replacementText,
1144 int32_t replacementLength,
1145 UChar *destBuf,
1146 int32_t destCapacity,
1147 UErrorCode *status);
1148
1170U_STABLE UText * U_EXPORT2
1172 UText *replacement,
1173 UText *dest,
1174 UErrorCode *status);
1175
1222U_STABLE int32_t U_EXPORT2
1224 const UChar *replacementText,
1225 int32_t replacementLength,
1226 UChar **destBuf,
1227 int32_t *destCapacity,
1228 UErrorCode *status);
1229
1252U_STABLE void U_EXPORT2
1254 UText *replacementText,
1255 UText *dest,
1256 UErrorCode *status);
1257
1282U_STABLE int32_t U_EXPORT2
1284 UChar **destBuf,
1285 int32_t *destCapacity,
1286 UErrorCode *status);
1287
1306U_STABLE UText * U_EXPORT2
1308 UText *dest,
1309 UErrorCode *status);
1310
1362U_STABLE int32_t U_EXPORT2
1364 UChar *destBuf,
1365 int32_t destCapacity,
1366 int32_t *requiredCapacity,
1367 UChar *destFields[],
1368 int32_t destFieldsCapacity,
1369 UErrorCode *status);
1370
1397U_STABLE int32_t U_EXPORT2
1399 UText *destFields[],
1400 int32_t destFieldsCapacity,
1401 UErrorCode *status);
1402
1425U_STABLE void U_EXPORT2
1427 int32_t limit,
1428 UErrorCode *status);
1429
1439U_STABLE int32_t U_EXPORT2
1441 UErrorCode *status);
1442
1463U_STABLE void U_EXPORT2
1465 int32_t limit,
1466 UErrorCode *status);
1467
1475U_STABLE int32_t U_EXPORT2
1477 UErrorCode *status);
1478
1479
1500 const void *context,
1501 int32_t steps);
1503
1518U_STABLE void U_EXPORT2
1520 URegexMatchCallback *callback,
1521 const void *context,
1522 UErrorCode *status);
1523
1524
1536U_STABLE void U_EXPORT2
1538 URegexMatchCallback **callback,
1539 const void **context,
1540 UErrorCode *status);
1541
1574 const void *context,
1575 int64_t matchIndex);
1577
1578
1590U_STABLE void U_EXPORT2
1593 const void *context,
1594 UErrorCode *status);
1595
1607U_STABLE void U_EXPORT2
1609 URegexFindProgressCallback **callback,
1610 const void **context,
1611 UErrorCode *status);
1612
1613#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1614#endif /* UREGEX_H */
"Smart pointer" class, closes a URegularExpression via uregex_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:562
C API: Parse Error Information.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1347
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
uint16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1573
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1499
UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack.
void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern.
int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation.
Definition: uregex.h:42
int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE the most recent match succeeded and additional input could cause it to fail.
UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:49
@ UREGEX_DOTALL
If set, '.
Definition: uregex.h:65
@ UREGEX_COMMENTS
Allow white space and comments within patterns.
Definition: uregex.h:61
@ UREGEX_MULTILINE
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise,...
Definition: uregex.h:83
@ UREGEX_LITERAL
If set, treat the entire pattern as a literal string.
Definition: uregex.h:77
@ UREGEX_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uregex.h:58
@ UREGEX_CANON_EQ
Forces normalization of pattern and strings.
Definition: uregex.h:55
@ UREGEX_ERROR_ON_UNKNOWN_ESCAPES
Error on Unrecognized backslash escapes.
Definition: uregex.h:108
@ UREGEX_UWORD
Unicode word boundaries.
Definition: uregex.h:99
@ UREGEX_UNIX_LINES
Unix-only line endings.
Definition: uregex.h:90
UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE if the most recent matching operation touched the end of the text being processed.
void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation.
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415