Linux premium71.web-hosting.com 4.18.0-513.11.1.lve.el8.x86_64 #1 SMP Thu Jan 18 16:21:02 UTC 2024 x86_64
LiteSpeed
Server IP : 198.187.29.8 & Your IP : 216.73.216.155
Domains :
Cant Read [ /etc/named.conf ]
User : cleahvkv
Terminal
Auto Root
Create File
Create Folder
Localroot Suggester
Backdoor Destroyer
Readme
/
opt /
alt /
alt-nodejs20 /
root /
usr /
include /
unicode /
Delete
Unzip
Name
Size
Permission
Date
Action
alphaindex.h
26.54
KB
-rw-r--r--
2024-06-13 15:30
appendable.h
8.54
KB
-rw-r--r--
2024-06-13 15:30
basictz.h
9.99
KB
-rw-r--r--
2024-06-13 15:30
brkiter.h
27.83
KB
-rw-r--r--
2024-06-13 15:30
bytestream.h
10.75
KB
-rw-r--r--
2024-06-13 15:30
bytestrie.h
20.8
KB
-rw-r--r--
2024-06-13 15:30
bytestriebuilder.h
7.48
KB
-rw-r--r--
2024-06-13 15:30
calendar.h
107.68
KB
-rw-r--r--
2024-06-13 15:30
caniter.h
7.53
KB
-rw-r--r--
2024-06-13 15:30
casemap.h
25.42
KB
-rw-r--r--
2024-06-13 15:30
char16ptr.h
7.22
KB
-rw-r--r--
2024-06-13 15:30
chariter.h
23.79
KB
-rw-r--r--
2024-06-13 15:30
choicfmt.h
23.99
KB
-rw-r--r--
2024-06-13 15:30
coleitr.h
13.77
KB
-rw-r--r--
2024-06-13 15:30
coll.h
56.28
KB
-rw-r--r--
2024-06-13 15:30
compactdecimalformat.h
6.88
KB
-rw-r--r--
2024-06-13 15:30
curramt.h
3.67
KB
-rw-r--r--
2024-06-13 15:30
currpinf.h
7.3
KB
-rw-r--r--
2024-06-13 15:30
currunit.h
4.02
KB
-rw-r--r--
2024-06-13 15:30
datefmt.h
40.7
KB
-rw-r--r--
2024-06-13 15:30
dbbi.h
1.19
KB
-rw-r--r--
2024-06-13 15:30
dcfmtsym.h
20.93
KB
-rw-r--r--
2024-06-13 15:30
decimfmt.h
87.46
KB
-rw-r--r--
2024-06-13 15:30
displayoptions.h
7.08
KB
-rw-r--r--
2024-06-13 15:30
docmain.h
7.46
KB
-rw-r--r--
2024-06-13 15:30
dtfmtsym.h
38.21
KB
-rw-r--r--
2024-06-13 15:30
dtintrv.h
3.84
KB
-rw-r--r--
2024-06-13 15:30
dtitvfmt.h
48.87
KB
-rw-r--r--
2024-06-13 15:30
dtitvinf.h
18.63
KB
-rw-r--r--
2024-06-13 15:30
dtptngen.h
28.05
KB
-rw-r--r--
2024-06-13 15:30
dtrule.h
8.66
KB
-rw-r--r--
2024-06-13 15:30
edits.h
20.73
KB
-rw-r--r--
2024-06-13 15:30
enumset.h
2.08
KB
-rw-r--r--
2024-06-13 15:30
errorcode.h
4.84
KB
-rw-r--r--
2024-06-13 15:30
fieldpos.h
8.69
KB
-rw-r--r--
2024-06-13 15:30
filteredbrk.h
5.37
KB
-rw-r--r--
2024-06-13 15:30
fmtable.h
24.36
KB
-rw-r--r--
2024-06-13 15:30
format.h
12.5
KB
-rw-r--r--
2024-06-13 15:30
formattednumber.h
6.28
KB
-rw-r--r--
2024-06-13 15:30
formattedvalue.h
9.75
KB
-rw-r--r--
2024-06-13 15:30
fpositer.h
3.03
KB
-rw-r--r--
2024-06-13 15:30
gender.h
3.35
KB
-rw-r--r--
2024-06-13 15:30
gregocal.h
30.3
KB
-rw-r--r--
2024-06-13 15:30
icudataver.h
1.02
KB
-rw-r--r--
2024-06-13 15:30
icuplug.h
12.1
KB
-rw-r--r--
2024-06-13 15:30
idna.h
12.71
KB
-rw-r--r--
2024-06-13 15:30
listformatter.h
8.59
KB
-rw-r--r--
2024-06-13 15:30
localebuilder.h
11.08
KB
-rw-r--r--
2024-06-13 15:30
localematcher.h
26.86
KB
-rw-r--r--
2024-06-13 15:30
localpointer.h
19.55
KB
-rw-r--r--
2024-06-13 15:30
locdspnm.h
7.12
KB
-rw-r--r--
2024-06-13 15:30
locid.h
48.31
KB
-rw-r--r--
2024-06-13 15:30
measfmt.h
11.41
KB
-rw-r--r--
2024-06-13 15:30
measunit.h
108.23
KB
-rw-r--r--
2024-06-13 15:30
measure.h
4.68
KB
-rw-r--r--
2024-06-13 15:30
messageformat2.h
18.13
KB
-rw-r--r--
2024-06-13 15:30
messageformat2_arguments.h
4.3
KB
-rw-r--r--
2024-06-13 15:30
messageformat2_data_model.h
123
KB
-rw-r--r--
2024-06-13 15:30
messageformat2_data_model_names.h
784
B
-rw-r--r--
2024-06-13 15:30
messageformat2_formattable.h
38.29
KB
-rw-r--r--
2024-06-13 15:30
messageformat2_function_registry.h
18
KB
-rw-r--r--
2024-06-13 15:30
messagepattern.h
33.72
KB
-rw-r--r--
2024-06-13 15:30
msgfmt.h
44.2
KB
-rw-r--r--
2024-06-13 15:30
normalizer2.h
34.73
KB
-rw-r--r--
2024-06-13 15:30
normlzr.h
30.79
KB
-rw-r--r--
2024-06-13 15:30
nounit.h
2.24
KB
-rw-r--r--
2024-06-13 15:30
numberformatter.h
90.74
KB
-rw-r--r--
2024-06-13 15:30
numberrangeformatter.h
26.05
KB
-rw-r--r--
2024-06-13 15:30
numfmt.h
50.16
KB
-rw-r--r--
2024-06-13 15:30
numsys.h
7.22
KB
-rw-r--r--
2024-06-13 15:30
parseerr.h
3.08
KB
-rw-r--r--
2024-06-13 15:30
parsepos.h
5.56
KB
-rw-r--r--
2024-06-13 15:30
platform.h
26.66
KB
-rw-r--r--
2024-06-13 15:30
plurfmt.h
25.07
KB
-rw-r--r--
2024-06-13 15:30
plurrule.h
20.63
KB
-rw-r--r--
2024-06-13 15:30
ptypes.h
2.16
KB
-rw-r--r--
2024-06-13 15:30
putil.h
6.32
KB
-rw-r--r--
2024-06-13 15:30
rbbi.h
32.04
KB
-rw-r--r--
2024-06-13 15:30
rbnf.h
50.53
KB
-rw-r--r--
2024-06-13 15:30
rbtz.h
15.75
KB
-rw-r--r--
2024-06-13 15:30
regex.h
84.45
KB
-rw-r--r--
2024-06-13 15:30
region.h
9.2
KB
-rw-r--r--
2024-06-13 15:30
reldatefmt.h
22.36
KB
-rw-r--r--
2024-06-13 15:30
rep.h
9.38
KB
-rw-r--r--
2024-06-13 15:30
resbund.h
18.03
KB
-rw-r--r--
2024-06-13 15:30
schriter.h
6.09
KB
-rw-r--r--
2024-06-13 15:30
scientificnumberformatter.h
6.44
KB
-rw-r--r--
2024-06-13 15:30
search.h
22.21
KB
-rw-r--r--
2024-06-13 15:30
selfmt.h
14.35
KB
-rw-r--r--
2024-06-13 15:30
simpleformatter.h
12.58
KB
-rw-r--r--
2024-06-13 15:30
simplenumberformatter.h
9.18
KB
-rw-r--r--
2024-06-13 15:30
simpletz.h
45.62
KB
-rw-r--r--
2024-06-13 15:30
smpdtfmt.h
57.06
KB
-rw-r--r--
2024-06-13 15:30
sortkey.h
11.12
KB
-rw-r--r--
2024-06-13 15:30
std_string.h
1.05
KB
-rw-r--r--
2024-06-13 15:30
strenum.h
9.96
KB
-rw-r--r--
2024-06-13 15:30
stringoptions.h
5.79
KB
-rw-r--r--
2024-06-13 15:30
stringpiece.h
10.02
KB
-rw-r--r--
2024-06-13 15:30
stringtriebuilder.h
15.5
KB
-rw-r--r--
2024-06-13 15:30
stsearch.h
21.43
KB
-rw-r--r--
2024-06-13 15:30
symtable.h
4.28
KB
-rw-r--r--
2024-06-13 15:30
tblcoll.h
36.93
KB
-rw-r--r--
2024-06-13 15:30
timezone.h
45.64
KB
-rw-r--r--
2024-06-13 15:30
tmunit.h
3.4
KB
-rw-r--r--
2024-06-13 15:30
tmutamt.h
4.9
KB
-rw-r--r--
2024-06-13 15:30
tmutfmt.h
7.42
KB
-rw-r--r--
2024-06-13 15:30
translit.h
65.8
KB
-rw-r--r--
2024-06-13 15:30
tzfmt.h
42.95
KB
-rw-r--r--
2024-06-13 15:30
tznames.h
16.85
KB
-rw-r--r--
2024-06-13 15:30
tzrule.h
34.81
KB
-rw-r--r--
2024-06-13 15:30
tztrans.h
6.11
KB
-rw-r--r--
2024-06-13 15:30
ubidi.h
89.61
KB
-rw-r--r--
2024-06-13 15:30
ubiditransform.h
12.71
KB
-rw-r--r--
2024-06-13 15:30
ubrk.h
24.43
KB
-rw-r--r--
2024-06-13 15:30
ucal.h
64.01
KB
-rw-r--r--
2024-06-13 15:30
ucasemap.h
15.21
KB
-rw-r--r--
2024-06-13 15:30
ucat.h
5.35
KB
-rw-r--r--
2024-06-13 15:30
uchar.h
150.13
KB
-rw-r--r--
2024-06-13 15:30
ucharstrie.h
22.56
KB
-rw-r--r--
2024-06-13 15:30
ucharstriebuilder.h
7.48
KB
-rw-r--r--
2024-06-13 15:30
uchriter.h
13.24
KB
-rw-r--r--
2024-06-13 15:30
uclean.h
11.21
KB
-rw-r--r--
2024-06-13 15:30
ucnv.h
83.34
KB
-rw-r--r--
2024-06-13 15:30
ucnv_cb.h
6.58
KB
-rw-r--r--
2024-06-13 15:30
ucnv_err.h
20.98
KB
-rw-r--r--
2024-06-13 15:30
ucnvsel.h
6.24
KB
-rw-r--r--
2024-06-13 15:30
ucol.h
62.7
KB
-rw-r--r--
2024-06-13 15:30
ucoleitr.h
9.82
KB
-rw-r--r--
2024-06-13 15:30
uconfig.h
12.56
KB
-rw-r--r--
2024-06-13 15:30
ucpmap.h
5.54
KB
-rw-r--r--
2024-06-13 15:30
ucptrie.h
22.51
KB
-rw-r--r--
2024-06-13 15:30
ucsdet.h
14.69
KB
-rw-r--r--
2024-06-13 15:30
ucurr.h
16.72
KB
-rw-r--r--
2024-06-13 15:30
udat.h
62.36
KB
-rw-r--r--
2024-06-13 15:30
udata.h
15.63
KB
-rw-r--r--
2024-06-13 15:30
udateintervalformat.h
11.93
KB
-rw-r--r--
2024-06-13 15:30
udatpg.h
30.13
KB
-rw-r--r--
2024-06-13 15:30
udisplaycontext.h
5.94
KB
-rw-r--r--
2024-06-13 15:30
udisplayoptions.h
8.86
KB
-rw-r--r--
2024-06-13 15:30
uenum.h
7.79
KB
-rw-r--r--
2024-06-13 15:30
ufieldpositer.h
4.41
KB
-rw-r--r--
2024-06-13 15:30
uformattable.h
10.97
KB
-rw-r--r--
2024-06-13 15:30
uformattednumber.h
8.09
KB
-rw-r--r--
2024-06-13 15:30
uformattedvalue.h
12.25
KB
-rw-r--r--
2024-06-13 15:30
ugender.h
2.06
KB
-rw-r--r--
2024-06-13 15:30
uidna.h
33.43
KB
-rw-r--r--
2024-06-13 15:30
uiter.h
22.75
KB
-rw-r--r--
2024-06-13 15:30
uldnames.h
10.48
KB
-rw-r--r--
2024-06-13 15:30
ulistformatter.h
10.78
KB
-rw-r--r--
2024-06-13 15:30
uloc.h
55.38
KB
-rw-r--r--
2024-06-13 15:30
ulocale.h
6.35
KB
-rw-r--r--
2024-06-13 15:30
ulocbuilder.h
16.73
KB
-rw-r--r--
2024-06-13 15:30
ulocdata.h
11.3
KB
-rw-r--r--
2024-06-13 15:30
umachine.h
14.59
KB
-rw-r--r--
2024-06-13 15:30
umisc.h
1.34
KB
-rw-r--r--
2024-06-13 15:30
umsg.h
24.25
KB
-rw-r--r--
2024-06-13 15:30
umutablecptrie.h
8.3
KB
-rw-r--r--
2024-06-13 15:30
unifilt.h
4
KB
-rw-r--r--
2024-06-13 15:30
unifunct.h
4.05
KB
-rw-r--r--
2024-06-13 15:30
unimatch.h
6.1
KB
-rw-r--r--
2024-06-13 15:30
unirepl.h
3.38
KB
-rw-r--r--
2024-06-13 15:30
uniset.h
66.82
KB
-rw-r--r--
2024-06-13 15:30
unistr.h
171.33
KB
-rw-r--r--
2024-06-13 15:30
unorm.h
20.55
KB
-rw-r--r--
2024-06-13 15:30
unorm2.h
25.71
KB
-rw-r--r--
2024-06-13 15:30
unum.h
55.16
KB
-rw-r--r--
2024-06-13 15:30
unumberformatter.h
19.68
KB
-rw-r--r--
2024-06-13 15:30
unumberoptions.h
5.23
KB
-rw-r--r--
2024-06-13 15:30
unumberrangeformatter.h
15.35
KB
-rw-r--r--
2024-06-13 15:30
unumsys.h
7.26
KB
-rw-r--r--
2024-06-13 15:30
uobject.h
10.66
KB
-rw-r--r--
2024-06-13 15:30
upluralrules.h
8.79
KB
-rw-r--r--
2024-06-13 15:30
uregex.h
71.99
KB
-rw-r--r--
2024-06-13 15:30
uregion.h
9.81
KB
-rw-r--r--
2024-06-13 15:30
ureldatefmt.h
16.98
KB
-rw-r--r--
2024-06-13 15:30
urename.h
141.31
KB
-rw-r--r--
2024-06-13 15:30
urep.h
5.38
KB
-rw-r--r--
2024-06-13 15:30
ures.h
36.65
KB
-rw-r--r--
2024-06-13 15:30
uscript.h
27.89
KB
-rw-r--r--
2024-06-13 15:30
usearch.h
39.21
KB
-rw-r--r--
2024-06-13 15:30
uset.h
45.56
KB
-rw-r--r--
2024-06-13 15:30
usetiter.h
9.63
KB
-rw-r--r--
2024-06-13 15:30
ushape.h
18
KB
-rw-r--r--
2024-06-13 15:30
usimplenumberformatter.h
7.63
KB
-rw-r--r--
2024-06-13 15:30
uspoof.h
80.32
KB
-rw-r--r--
2024-06-13 15:30
usprep.h
8.19
KB
-rw-r--r--
2024-06-13 15:30
ustdio.h
38.56
KB
-rw-r--r--
2024-06-13 15:30
ustream.h
1.89
KB
-rw-r--r--
2024-06-13 15:30
ustring.h
72.13
KB
-rw-r--r--
2024-06-13 15:30
ustringtrie.h
3.15
KB
-rw-r--r--
2024-06-13 15:30
utext.h
58.1
KB
-rw-r--r--
2024-06-13 15:30
utf.h
7.87
KB
-rw-r--r--
2024-06-13 15:30
utf16.h
23.35
KB
-rw-r--r--
2024-06-13 15:30
utf32.h
763
B
-rw-r--r--
2024-06-13 15:30
utf8.h
30.83
KB
-rw-r--r--
2024-06-13 15:30
utf_old.h
45.8
KB
-rw-r--r--
2024-06-13 15:30
utmscale.h
13.78
KB
-rw-r--r--
2024-06-13 15:30
utrace.h
17.18
KB
-rw-r--r--
2024-06-13 15:30
utrans.h
25.54
KB
-rw-r--r--
2024-06-13 15:30
utypes.h
33.71
KB
-rw-r--r--
2024-06-13 15:30
uvernum.h
6.33
KB
-rw-r--r--
2024-06-13 15:30
uversion.h
5.99
KB
-rw-r--r--
2024-06-13 15:30
vtzone.h
20.68
KB
-rw-r--r--
2024-06-13 15:30
Save
Rename
// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 2005-2013, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucsdet.h * encoding: UTF-8 * indentation:4 * * created on: 2005Aug04 * created by: Andy Heninger * * ICU Character Set Detection, API for C * * Draft version 18 Oct 2005 * */ #ifndef __UCSDET_H #define __UCSDET_H #include "unicode/utypes.h" #if !UCONFIG_NO_CONVERSION #include "unicode/uenum.h" #if U_SHOW_CPLUSPLUS_API #include "unicode/localpointer.h" #endif // U_SHOW_CPLUSPLUS_API /** * \file * \brief C API: Charset Detection API * * This API provides a facility for detecting the * charset or encoding of character data in an unknown text format. * The input data can be from an array of bytes. * <p> * Character set detection is at best an imprecise operation. The detection * process will attempt to identify the charset that best matches the characteristics * of the byte data, but the process is partly statistical in nature, and * the results can not be guaranteed to always be correct. * <p> * For best accuracy in charset detection, the input data should be primarily * in a single language, and a minimum of a few hundred bytes worth of plain text * in the language are needed. The detection process will attempt to * ignore html or xml style markup that could otherwise obscure the content. * <p> * An alternative to the ICU Charset Detector is the * Compact Encoding Detector, https://github.com/google/compact_enc_det. * It often gives more accurate results, especially with short input samples. */ struct UCharsetDetector; /** * Structure representing a charset detector * @stable ICU 3.6 */ typedef struct UCharsetDetector UCharsetDetector; struct UCharsetMatch; /** * Opaque structure representing a match that was identified * from a charset detection operation. * @stable ICU 3.6 */ typedef struct UCharsetMatch UCharsetMatch; /** * Open a charset detector. * * @param status Any error conditions occurring during the open * operation are reported back in this variable. * @return the newly opened charset detector. * @stable ICU 3.6 */ U_CAPI UCharsetDetector * U_EXPORT2 ucsdet_open(UErrorCode *status); /** * Close a charset detector. All storage and any other resources * owned by this charset detector will be released. Failure to * close a charset detector when finished with it can result in * memory leaks in the application. * * @param ucsd The charset detector to be closed. * @stable ICU 3.6 */ U_CAPI void U_EXPORT2 ucsdet_close(UCharsetDetector *ucsd); #if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN /** * \class LocalUCharsetDetectorPointer * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close(). * For most methods see the LocalPointerBase base class. * * @see LocalPointerBase * @see LocalPointer * @stable ICU 4.4 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close); U_NAMESPACE_END #endif /** * Set the input byte data whose charset is to detected. * * Ownership of the input text byte array remains with the caller. * The input string must not be altered or deleted until the charset * detector is either closed or reset to refer to different input text. * * @param ucsd the charset detector to be used. * @param textIn the input text of unknown encoding. . * @param len the length of the input text, or -1 if the text * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_CAPI void U_EXPORT2 ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status); /** Set the declared encoding for charset detection. * The declared encoding of an input text is an encoding obtained * by the user from an http header or xml declaration or similar source that * can be provided as an additional hint to the charset detector. * * How and whether the declared encoding will be used during the * detection process is TBD. * * @param ucsd the charset detector to be used. * @param encoding an encoding for the current data obtained from * a header or declaration or other source outside * of the byte data itself. * @param length the length of the encoding name, or -1 if the name string * is NUL terminated. * @param status any error conditions are reported back in this variable. * * @stable ICU 3.6 */ U_CAPI void U_EXPORT2 ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status); /** * Return the charset that best matches the supplied input data. * * Note though, that because the detection * only looks at the start of the input data, * there is a possibility that the returned charset will fail to handle * the full set of input data. * <p> * The returned UCharsetMatch object is owned by the UCharsetDetector. * It will remain valid until the detector input is reset, or until * the detector is closed. * <p> * The function will fail if * <ul> * <li>no charset appears to match the data.</li> * <li>no input text has been provided</li> * </ul> * * @param ucsd the charset detector to be used. * @param status any error conditions are reported back in this variable. * @return a UCharsetMatch representing the best matching charset, * or NULL if no charset matches the byte data. * * @stable ICU 3.6 */ U_CAPI const UCharsetMatch * U_EXPORT2 ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status); /** * Find all charset matches that appear to be consistent with the input, * returning an array of results. The results are ordered with the * best quality match first. * * Because the detection only looks at a limited amount of the * input byte data, some of the returned charsets may fail to handle * the all of input data. * <p> * The returned UCharsetMatch objects are owned by the UCharsetDetector. * They will remain valid until the detector is closed or modified * * <p> * Return an error if * <ul> * <li>no charsets appear to match the input data.</li> * <li>no input text has been provided</li> * </ul> * * @param ucsd the charset detector to be used. * @param matchesFound pointer to a variable that will be set to the * number of charsets identified that are consistent with * the input data. Output only. * @param status any error conditions are reported back in this variable. * @return A pointer to an array of pointers to UCharSetMatch objects. * This array, and the UCharSetMatch instances to which it refers, * are owned by the UCharsetDetector, and will remain valid until * the detector is closed or modified. * @stable ICU 3.6 */ U_CAPI const UCharsetMatch ** U_EXPORT2 ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status); /** * Get the name of the charset represented by a UCharsetMatch. * * The storage for the returned name string is owned by the * UCharsetMatch, and will remain valid while the UCharsetMatch * is valid. * * The name returned is suitable for use with the ICU conversion APIs. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return The name of the matching charset. * * @stable ICU 3.6 */ U_CAPI const char * U_EXPORT2 ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get a confidence number for the quality of the match of the byte * data with the charset. Confidence numbers range from zero to 100, * with 100 representing complete confidence and zero representing * no confidence. * * The confidence values are somewhat arbitrary. They define an * an ordering within the results for any single detection operation * but are not generally comparable between the results for different input. * * A confidence value of ten does have a general meaning - it is used * for charsets that can represent the input data, but for which there * is no other indication that suggests that the charset is the correct one. * Pure 7 bit ASCII data, for example, is compatible with a * great many charsets, most of which will appear as possible matches * with a confidence of 10. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return A confidence number for the charset match. * * @stable ICU 3.6 */ U_CAPI int32_t U_EXPORT2 ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get the RFC 3066 code for the language of the input data. * * The Charset Detection service is intended primarily for detecting * charsets, not language. For some, but not all, charsets, a language is * identified as a byproduct of the detection process, and that is what * is returned by this function. * * CAUTION: * 1. Language information is not available for input data encoded in * all charsets. In particular, no language is identified * for UTF-8 input data. * * 2. Closely related languages may sometimes be confused. * * If more accurate language detection is required, a linguistic * analysis package should be used. * * The storage for the returned name string is owned by the * UCharsetMatch, and will remain valid while the UCharsetMatch * is valid. * * @param ucsm The charset match object. * @param status Any error conditions are reported back in this variable. * @return The RFC 3066 code for the language of the input data, or * an empty string if the language could not be determined. * * @stable ICU 3.6 */ U_CAPI const char * U_EXPORT2 ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status); /** * Get the entire input text as a UChar string, placing it into * a caller-supplied buffer. A terminating * NUL character will be appended to the buffer if space is available. * * The number of UChars in the output string, not including the terminating * NUL, is returned. * * If the supplied buffer is smaller than required to hold the output, * the contents of the buffer are undefined. The full output string length * (in UChars) is returned as always, and can be used to allocate a buffer * of the correct size. * * * @param ucsm The charset match object. * @param buf A UChar buffer to be filled with the converted text data. * @param cap The capacity of the buffer in UChars. * @param status Any error conditions are reported back in this variable. * @return The number of UChars in the output string. * * @stable ICU 3.6 */ U_CAPI int32_t U_EXPORT2 ucsdet_getUChars(const UCharsetMatch *ucsm, UChar *buf, int32_t cap, UErrorCode *status); /** * Get an iterator over the set of all detectable charsets - * over the charsets that are known to the charset detection * service. * * The returned UEnumeration provides access to the names of * the charsets. * * <p> * The state of the Charset detector that is passed in does not * affect the result of this function, but requiring a valid, open * charset detector as a parameter insures that the charset detection * service has been safely initialized and that the required detection * data is available. * * <p> * <b>Note:</b> Multiple different charset encodings in a same family may use * a single shared name in this implementation. For example, this method returns * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252" * (Windows Latin 1). However, actual detection result could be "windows-1252" * when the input data matches Latin 1 code points with any points only available * in "windows-1252". * * @param ucsd a Charset detector. * @param status Any error conditions are reported back in this variable. * @return an iterator providing access to the detectable charset names. * @stable ICU 3.6 */ U_CAPI UEnumeration * U_EXPORT2 ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); /** * Test whether input filtering is enabled for this charset detector. * Input filtering removes text that appears to be HTML or xml * markup from the input before applying the code page detection * heuristics. * * @param ucsd The charset detector to check. * @return true if filtering is enabled. * @stable ICU 3.6 */ U_CAPI UBool U_EXPORT2 ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd); /** * Enable filtering of input text. If filtering is enabled, * text within angle brackets ("<" and ">") will be removed * before detection, which will remove most HTML or xml markup. * * @param ucsd the charset detector to be modified. * @param filter <code>true</code> to enable input text filtering. * @return The previous setting. * * @stable ICU 3.6 */ U_CAPI UBool U_EXPORT2 ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter); #ifndef U_HIDE_INTERNAL_API /** * Get an iterator over the set of detectable charsets - * over the charsets that are enabled by the specified charset detector. * * The returned UEnumeration provides access to the names of * the charsets. * * @param ucsd a Charset detector. * @param status Any error conditions are reported back in this variable. * @return an iterator providing access to the detectable charset names by * the specified charset detector. * @internal */ U_CAPI UEnumeration * U_EXPORT2 ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status); /** * Enable or disable individual charset encoding. * A name of charset encoding must be included in the names returned by * {@link #ucsdet_getAllDetectableCharsets()}. * * @param ucsd a Charset detector. * @param encoding encoding the name of charset encoding. * @param enabled <code>true</code> to enable, or <code>false</code> to disable the * charset encoding. * @param status receives the return status. When the name of charset encoding * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set. * @internal */ U_CAPI void U_EXPORT2 ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status); #endif /* U_HIDE_INTERNAL_API */ #endif #endif /* __UCSDET_H */