Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ExeBy/smexe_ro.../usr/include/python3....
File: unicodeobject.h
#ifndef Py_UNICODEOBJECT_H
[0] Fix | Delete
#define Py_UNICODEOBJECT_H
[1] Fix | Delete
[2] Fix | Delete
#include <stdarg.h>
[3] Fix | Delete
[4] Fix | Delete
/*
[5] Fix | Delete
[6] Fix | Delete
Unicode implementation based on original code by Fredrik Lundh,
[7] Fix | Delete
modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
[8] Fix | Delete
Unicode Integration Proposal. (See
[9] Fix | Delete
http://www.egenix.com/files/python/unicode-proposal.txt).
[10] Fix | Delete
[11] Fix | Delete
Copyright (c) Corporation for National Research Initiatives.
[12] Fix | Delete
[13] Fix | Delete
[14] Fix | Delete
Original header:
[15] Fix | Delete
--------------------------------------------------------------------
[16] Fix | Delete
[17] Fix | Delete
* Yet another Unicode string type for Python. This type supports the
[18] Fix | Delete
* 16-bit Basic Multilingual Plane (BMP) only.
[19] Fix | Delete
*
[20] Fix | Delete
* Written by Fredrik Lundh, January 1999.
[21] Fix | Delete
*
[22] Fix | Delete
* Copyright (c) 1999 by Secret Labs AB.
[23] Fix | Delete
* Copyright (c) 1999 by Fredrik Lundh.
[24] Fix | Delete
*
[25] Fix | Delete
* fredrik@pythonware.com
[26] Fix | Delete
* http://www.pythonware.com
[27] Fix | Delete
*
[28] Fix | Delete
* --------------------------------------------------------------------
[29] Fix | Delete
* This Unicode String Type is
[30] Fix | Delete
*
[31] Fix | Delete
* Copyright (c) 1999 by Secret Labs AB
[32] Fix | Delete
* Copyright (c) 1999 by Fredrik Lundh
[33] Fix | Delete
*
[34] Fix | Delete
* By obtaining, using, and/or copying this software and/or its
[35] Fix | Delete
* associated documentation, you agree that you have read, understood,
[36] Fix | Delete
* and will comply with the following terms and conditions:
[37] Fix | Delete
*
[38] Fix | Delete
* Permission to use, copy, modify, and distribute this software and its
[39] Fix | Delete
* associated documentation for any purpose and without fee is hereby
[40] Fix | Delete
* granted, provided that the above copyright notice appears in all
[41] Fix | Delete
* copies, and that both that copyright notice and this permission notice
[42] Fix | Delete
* appear in supporting documentation, and that the name of Secret Labs
[43] Fix | Delete
* AB or the author not be used in advertising or publicity pertaining to
[44] Fix | Delete
* distribution of the software without specific, written prior
[45] Fix | Delete
* permission.
[46] Fix | Delete
*
[47] Fix | Delete
* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
[48] Fix | Delete
* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
[49] Fix | Delete
* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
[50] Fix | Delete
* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
[51] Fix | Delete
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
[52] Fix | Delete
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
[53] Fix | Delete
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
[54] Fix | Delete
* -------------------------------------------------------------------- */
[55] Fix | Delete
[56] Fix | Delete
#include <ctype.h>
[57] Fix | Delete
[58] Fix | Delete
/* === Internal API ======================================================= */
[59] Fix | Delete
[60] Fix | Delete
/* --- Internal Unicode Format -------------------------------------------- */
[61] Fix | Delete
[62] Fix | Delete
/* Python 3.x requires unicode */
[63] Fix | Delete
#define Py_USING_UNICODE
[64] Fix | Delete
[65] Fix | Delete
#ifndef SIZEOF_WCHAR_T
[66] Fix | Delete
#error Must define SIZEOF_WCHAR_T
[67] Fix | Delete
#endif
[68] Fix | Delete
[69] Fix | Delete
#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
[70] Fix | Delete
[71] Fix | Delete
/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
[72] Fix | Delete
Otherwise, Unicode strings are stored as UCS-2 (with limited support
[73] Fix | Delete
for UTF-16) */
[74] Fix | Delete
[75] Fix | Delete
#if Py_UNICODE_SIZE >= 4
[76] Fix | Delete
#define Py_UNICODE_WIDE
[77] Fix | Delete
#endif
[78] Fix | Delete
[79] Fix | Delete
/* Set these flags if the platform has "wchar.h" and the
[80] Fix | Delete
wchar_t type is a 16-bit unsigned type */
[81] Fix | Delete
/* #define HAVE_WCHAR_H */
[82] Fix | Delete
/* #define HAVE_USABLE_WCHAR_T */
[83] Fix | Delete
[84] Fix | Delete
/* Py_UNICODE was the native Unicode storage format (code unit) used by
[85] Fix | Delete
Python and represents a single Unicode element in the Unicode type.
[86] Fix | Delete
With PEP 393, Py_UNICODE is deprecated and replaced with a
[87] Fix | Delete
typedef to wchar_t. */
[88] Fix | Delete
[89] Fix | Delete
#ifndef Py_LIMITED_API
[90] Fix | Delete
#define PY_UNICODE_TYPE wchar_t
[91] Fix | Delete
typedef wchar_t Py_UNICODE;
[92] Fix | Delete
#endif
[93] Fix | Delete
[94] Fix | Delete
/* If the compiler provides a wchar_t type we try to support it
[95] Fix | Delete
through the interface functions PyUnicode_FromWideChar(),
[96] Fix | Delete
PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
[97] Fix | Delete
[98] Fix | Delete
#ifdef HAVE_USABLE_WCHAR_T
[99] Fix | Delete
# ifndef HAVE_WCHAR_H
[100] Fix | Delete
# define HAVE_WCHAR_H
[101] Fix | Delete
# endif
[102] Fix | Delete
#endif
[103] Fix | Delete
[104] Fix | Delete
#ifdef HAVE_WCHAR_H
[105] Fix | Delete
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
[106] Fix | Delete
# ifdef _HAVE_BSDI
[107] Fix | Delete
# include <time.h>
[108] Fix | Delete
# endif
[109] Fix | Delete
# include <wchar.h>
[110] Fix | Delete
#endif
[111] Fix | Delete
[112] Fix | Delete
/* Py_UCS4 and Py_UCS2 are typedefs for the respective
[113] Fix | Delete
unicode representations. */
[114] Fix | Delete
typedef uint32_t Py_UCS4;
[115] Fix | Delete
typedef uint16_t Py_UCS2;
[116] Fix | Delete
typedef uint8_t Py_UCS1;
[117] Fix | Delete
[118] Fix | Delete
/* --- Internal Unicode Operations ---------------------------------------- */
[119] Fix | Delete
[120] Fix | Delete
/* Since splitting on whitespace is an important use case, and
[121] Fix | Delete
whitespace in most situations is solely ASCII whitespace, we
[122] Fix | Delete
optimize for the common case by using a quick look-up table
[123] Fix | Delete
_Py_ascii_whitespace (see below) with an inlined check.
[124] Fix | Delete
[125] Fix | Delete
*/
[126] Fix | Delete
#ifndef Py_LIMITED_API
[127] Fix | Delete
#define Py_UNICODE_ISSPACE(ch) \
[128] Fix | Delete
((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
[129] Fix | Delete
[130] Fix | Delete
#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
[131] Fix | Delete
#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
[132] Fix | Delete
#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
[133] Fix | Delete
#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
[134] Fix | Delete
[135] Fix | Delete
#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
[136] Fix | Delete
#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
[137] Fix | Delete
#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
[138] Fix | Delete
[139] Fix | Delete
#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
[140] Fix | Delete
#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
[141] Fix | Delete
#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
[142] Fix | Delete
#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
[143] Fix | Delete
[144] Fix | Delete
#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
[145] Fix | Delete
#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
[146] Fix | Delete
#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
[147] Fix | Delete
[148] Fix | Delete
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
[149] Fix | Delete
[150] Fix | Delete
#define Py_UNICODE_ISALNUM(ch) \
[151] Fix | Delete
(Py_UNICODE_ISALPHA(ch) || \
[152] Fix | Delete
Py_UNICODE_ISDECIMAL(ch) || \
[153] Fix | Delete
Py_UNICODE_ISDIGIT(ch) || \
[154] Fix | Delete
Py_UNICODE_ISNUMERIC(ch))
[155] Fix | Delete
[156] Fix | Delete
#define Py_UNICODE_COPY(target, source, length) \
[157] Fix | Delete
memcpy((target), (source), (length)*sizeof(Py_UNICODE))
[158] Fix | Delete
[159] Fix | Delete
#define Py_UNICODE_FILL(target, value, length) \
[160] Fix | Delete
do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
[161] Fix | Delete
for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
[162] Fix | Delete
} while (0)
[163] Fix | Delete
[164] Fix | Delete
/* macros to work with surrogates */
[165] Fix | Delete
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
[166] Fix | Delete
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
[167] Fix | Delete
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
[168] Fix | Delete
/* Join two surrogate characters and return a single Py_UCS4 value. */
[169] Fix | Delete
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
[170] Fix | Delete
(((((Py_UCS4)(high) & 0x03FF) << 10) | \
[171] Fix | Delete
((Py_UCS4)(low) & 0x03FF)) + 0x10000)
[172] Fix | Delete
/* high surrogate = top 10 bits added to D800 */
[173] Fix | Delete
#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
[174] Fix | Delete
/* low surrogate = bottom 10 bits added to DC00 */
[175] Fix | Delete
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
[176] Fix | Delete
[177] Fix | Delete
/* Check if substring matches at given offset. The offset must be
[178] Fix | Delete
valid, and the substring must not be empty. */
[179] Fix | Delete
[180] Fix | Delete
#define Py_UNICODE_MATCH(string, offset, substring) \
[181] Fix | Delete
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
[182] Fix | Delete
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
[183] Fix | Delete
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
[184] Fix | Delete
[185] Fix | Delete
#endif /* Py_LIMITED_API */
[186] Fix | Delete
[187] Fix | Delete
#ifdef __cplusplus
[188] Fix | Delete
extern "C" {
[189] Fix | Delete
#endif
[190] Fix | Delete
[191] Fix | Delete
/* --- Unicode Type ------------------------------------------------------- */
[192] Fix | Delete
[193] Fix | Delete
#ifndef Py_LIMITED_API
[194] Fix | Delete
[195] Fix | Delete
/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
[196] Fix | Delete
structure. state.ascii and state.compact are set, and the data
[197] Fix | Delete
immediately follow the structure. utf8_length and wstr_length can be found
[198] Fix | Delete
in the length field; the utf8 pointer is equal to the data pointer. */
[199] Fix | Delete
typedef struct {
[200] Fix | Delete
/* There are 4 forms of Unicode strings:
[201] Fix | Delete
[202] Fix | Delete
- compact ascii:
[203] Fix | Delete
[204] Fix | Delete
* structure = PyASCIIObject
[205] Fix | Delete
* test: PyUnicode_IS_COMPACT_ASCII(op)
[206] Fix | Delete
* kind = PyUnicode_1BYTE_KIND
[207] Fix | Delete
* compact = 1
[208] Fix | Delete
* ascii = 1
[209] Fix | Delete
* ready = 1
[210] Fix | Delete
* (length is the length of the utf8 and wstr strings)
[211] Fix | Delete
* (data starts just after the structure)
[212] Fix | Delete
* (since ASCII is decoded from UTF-8, the utf8 string are the data)
[213] Fix | Delete
[214] Fix | Delete
- compact:
[215] Fix | Delete
[216] Fix | Delete
* structure = PyCompactUnicodeObject
[217] Fix | Delete
* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
[218] Fix | Delete
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
[219] Fix | Delete
PyUnicode_4BYTE_KIND
[220] Fix | Delete
* compact = 1
[221] Fix | Delete
* ready = 1
[222] Fix | Delete
* ascii = 0
[223] Fix | Delete
* utf8 is not shared with data
[224] Fix | Delete
* utf8_length = 0 if utf8 is NULL
[225] Fix | Delete
* wstr is shared with data and wstr_length=length
[226] Fix | Delete
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
[227] Fix | Delete
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
[228] Fix | Delete
* wstr_length = 0 if wstr is NULL
[229] Fix | Delete
* (data starts just after the structure)
[230] Fix | Delete
[231] Fix | Delete
- legacy string, not ready:
[232] Fix | Delete
[233] Fix | Delete
* structure = PyUnicodeObject
[234] Fix | Delete
* test: kind == PyUnicode_WCHAR_KIND
[235] Fix | Delete
* length = 0 (use wstr_length)
[236] Fix | Delete
* hash = -1
[237] Fix | Delete
* kind = PyUnicode_WCHAR_KIND
[238] Fix | Delete
* compact = 0
[239] Fix | Delete
* ascii = 0
[240] Fix | Delete
* ready = 0
[241] Fix | Delete
* interned = SSTATE_NOT_INTERNED
[242] Fix | Delete
* wstr is not NULL
[243] Fix | Delete
* data.any is NULL
[244] Fix | Delete
* utf8 is NULL
[245] Fix | Delete
* utf8_length = 0
[246] Fix | Delete
[247] Fix | Delete
- legacy string, ready:
[248] Fix | Delete
[249] Fix | Delete
* structure = PyUnicodeObject structure
[250] Fix | Delete
* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
[251] Fix | Delete
* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
[252] Fix | Delete
PyUnicode_4BYTE_KIND
[253] Fix | Delete
* compact = 0
[254] Fix | Delete
* ready = 1
[255] Fix | Delete
* data.any is not NULL
[256] Fix | Delete
* utf8 is shared and utf8_length = length with data.any if ascii = 1
[257] Fix | Delete
* utf8_length = 0 if utf8 is NULL
[258] Fix | Delete
* wstr is shared with data.any and wstr_length = length
[259] Fix | Delete
if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
[260] Fix | Delete
or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
[261] Fix | Delete
* wstr_length = 0 if wstr is NULL
[262] Fix | Delete
[263] Fix | Delete
Compact strings use only one memory block (structure + characters),
[264] Fix | Delete
whereas legacy strings use one block for the structure and one block
[265] Fix | Delete
for characters.
[266] Fix | Delete
[267] Fix | Delete
Legacy strings are created by PyUnicode_FromUnicode() and
[268] Fix | Delete
PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
[269] Fix | Delete
when PyUnicode_READY() is called.
[270] Fix | Delete
[271] Fix | Delete
See also _PyUnicode_CheckConsistency().
[272] Fix | Delete
*/
[273] Fix | Delete
PyObject_HEAD
[274] Fix | Delete
Py_ssize_t length; /* Number of code points in the string */
[275] Fix | Delete
Py_hash_t hash; /* Hash value; -1 if not set */
[276] Fix | Delete
struct {
[277] Fix | Delete
/*
[278] Fix | Delete
SSTATE_NOT_INTERNED (0)
[279] Fix | Delete
SSTATE_INTERNED_MORTAL (1)
[280] Fix | Delete
SSTATE_INTERNED_IMMORTAL (2)
[281] Fix | Delete
[282] Fix | Delete
If interned != SSTATE_NOT_INTERNED, the two references from the
[283] Fix | Delete
dictionary to this object are *not* counted in ob_refcnt.
[284] Fix | Delete
*/
[285] Fix | Delete
unsigned int interned:2;
[286] Fix | Delete
/* Character size:
[287] Fix | Delete
[288] Fix | Delete
- PyUnicode_WCHAR_KIND (0):
[289] Fix | Delete
[290] Fix | Delete
* character type = wchar_t (16 or 32 bits, depending on the
[291] Fix | Delete
platform)
[292] Fix | Delete
[293] Fix | Delete
- PyUnicode_1BYTE_KIND (1):
[294] Fix | Delete
[295] Fix | Delete
* character type = Py_UCS1 (8 bits, unsigned)
[296] Fix | Delete
* all characters are in the range U+0000-U+00FF (latin1)
[297] Fix | Delete
* if ascii is set, all characters are in the range U+0000-U+007F
[298] Fix | Delete
(ASCII), otherwise at least one character is in the range
[299] Fix | Delete
U+0080-U+00FF
[300] Fix | Delete
[301] Fix | Delete
- PyUnicode_2BYTE_KIND (2):
[302] Fix | Delete
[303] Fix | Delete
* character type = Py_UCS2 (16 bits, unsigned)
[304] Fix | Delete
* all characters are in the range U+0000-U+FFFF (BMP)
[305] Fix | Delete
* at least one character is in the range U+0100-U+FFFF
[306] Fix | Delete
[307] Fix | Delete
- PyUnicode_4BYTE_KIND (4):
[308] Fix | Delete
[309] Fix | Delete
* character type = Py_UCS4 (32 bits, unsigned)
[310] Fix | Delete
* all characters are in the range U+0000-U+10FFFF
[311] Fix | Delete
* at least one character is in the range U+10000-U+10FFFF
[312] Fix | Delete
*/
[313] Fix | Delete
unsigned int kind:3;
[314] Fix | Delete
/* Compact is with respect to the allocation scheme. Compact unicode
[315] Fix | Delete
objects only require one memory block while non-compact objects use
[316] Fix | Delete
one block for the PyUnicodeObject struct and another for its data
[317] Fix | Delete
buffer. */
[318] Fix | Delete
unsigned int compact:1;
[319] Fix | Delete
/* The string only contains characters in the range U+0000-U+007F (ASCII)
[320] Fix | Delete
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
[321] Fix | Delete
set, use the PyASCIIObject structure. */
[322] Fix | Delete
unsigned int ascii:1;
[323] Fix | Delete
/* The ready flag indicates whether the object layout is initialized
[324] Fix | Delete
completely. This means that this is either a compact object, or
[325] Fix | Delete
the data pointer is filled out. The bit is redundant, and helps
[326] Fix | Delete
to minimize the test in PyUnicode_IS_READY(). */
[327] Fix | Delete
unsigned int ready:1;
[328] Fix | Delete
/* Padding to ensure that PyUnicode_DATA() is always aligned to
[329] Fix | Delete
4 bytes (see issue #19537 on m68k). */
[330] Fix | Delete
unsigned int :24;
[331] Fix | Delete
} state;
[332] Fix | Delete
wchar_t *wstr; /* wchar_t representation (null-terminated) */
[333] Fix | Delete
} PyASCIIObject;
[334] Fix | Delete
[335] Fix | Delete
/* Non-ASCII strings allocated through PyUnicode_New use the
[336] Fix | Delete
PyCompactUnicodeObject structure. state.compact is set, and the data
[337] Fix | Delete
immediately follow the structure. */
[338] Fix | Delete
typedef struct {
[339] Fix | Delete
PyASCIIObject _base;
[340] Fix | Delete
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
[341] Fix | Delete
* terminating \0. */
[342] Fix | Delete
char *utf8; /* UTF-8 representation (null-terminated) */
[343] Fix | Delete
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
[344] Fix | Delete
* surrogates count as two code points. */
[345] Fix | Delete
} PyCompactUnicodeObject;
[346] Fix | Delete
[347] Fix | Delete
/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
[348] Fix | Delete
PyUnicodeObject structure. The actual string data is initially in the wstr
[349] Fix | Delete
block, and copied into the data block using _PyUnicode_Ready. */
[350] Fix | Delete
typedef struct {
[351] Fix | Delete
PyCompactUnicodeObject _base;
[352] Fix | Delete
union {
[353] Fix | Delete
void *any;
[354] Fix | Delete
Py_UCS1 *latin1;
[355] Fix | Delete
Py_UCS2 *ucs2;
[356] Fix | Delete
Py_UCS4 *ucs4;
[357] Fix | Delete
} data; /* Canonical, smallest-form Unicode buffer */
[358] Fix | Delete
} PyUnicodeObject;
[359] Fix | Delete
#endif
[360] Fix | Delete
[361] Fix | Delete
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
[362] Fix | Delete
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
[363] Fix | Delete
[364] Fix | Delete
#define PyUnicode_Check(op) \
[365] Fix | Delete
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
[366] Fix | Delete
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
[367] Fix | Delete
[368] Fix | Delete
/* Fast access macros */
[369] Fix | Delete
#ifndef Py_LIMITED_API
[370] Fix | Delete
[371] Fix | Delete
#define PyUnicode_WSTR_LENGTH(op) \
[372] Fix | Delete
(PyUnicode_IS_COMPACT_ASCII(op) ? \
[373] Fix | Delete
((PyASCIIObject*)op)->length : \
[374] Fix | Delete
((PyCompactUnicodeObject*)op)->wstr_length)
[375] Fix | Delete
[376] Fix | Delete
/* Returns the deprecated Py_UNICODE representation's size in code units
[377] Fix | Delete
(this includes surrogate pairs as 2 units).
[378] Fix | Delete
If the Py_UNICODE representation is not available, it will be computed
[379] Fix | Delete
on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
[380] Fix | Delete
[381] Fix | Delete
#define PyUnicode_GET_SIZE(op) \
[382] Fix | Delete
(assert(PyUnicode_Check(op)), \
[383] Fix | Delete
(((PyASCIIObject *)(op))->wstr) ? \
[384] Fix | Delete
PyUnicode_WSTR_LENGTH(op) : \
[385] Fix | Delete
((void)PyUnicode_AsUnicode((PyObject *)(op)), \
[386] Fix | Delete
assert(((PyASCIIObject *)(op))->wstr), \
[387] Fix | Delete
PyUnicode_WSTR_LENGTH(op)))
[388] Fix | Delete
[389] Fix | Delete
#define PyUnicode_GET_DATA_SIZE(op) \
[390] Fix | Delete
(PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
[391] Fix | Delete
[392] Fix | Delete
/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
[393] Fix | Delete
representation on demand. Using this macro is very inefficient now,
[394] Fix | Delete
try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
[395] Fix | Delete
use PyUnicode_WRITE() and PyUnicode_READ(). */
[396] Fix | Delete
[397] Fix | Delete
#define PyUnicode_AS_UNICODE(op) \
[398] Fix | Delete
(assert(PyUnicode_Check(op)), \
[399] Fix | Delete
(((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
[400] Fix | Delete
PyUnicode_AsUnicode((PyObject *)(op)))
[401] Fix | Delete
[402] Fix | Delete
#define PyUnicode_AS_DATA(op) \
[403] Fix | Delete
((const char *)(PyUnicode_AS_UNICODE(op)))
[404] Fix | Delete
[405] Fix | Delete
[406] Fix | Delete
/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
[407] Fix | Delete
[408] Fix | Delete
/* Values for PyASCIIObject.state: */
[409] Fix | Delete
[410] Fix | Delete
/* Interning state. */
[411] Fix | Delete
#define SSTATE_NOT_INTERNED 0
[412] Fix | Delete
#define SSTATE_INTERNED_MORTAL 1
[413] Fix | Delete
#define SSTATE_INTERNED_IMMORTAL 2
[414] Fix | Delete
[415] Fix | Delete
/* Return true if the string contains only ASCII characters, or 0 if not. The
[416] Fix | Delete
string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
[417] Fix | Delete
ready. */
[418] Fix | Delete
#define PyUnicode_IS_ASCII(op) \
[419] Fix | Delete
(assert(PyUnicode_Check(op)), \
[420] Fix | Delete
assert(PyUnicode_IS_READY(op)), \
[421] Fix | Delete
((PyASCIIObject*)op)->state.ascii)
[422] Fix | Delete
[423] Fix | Delete
/* Return true if the string is compact or 0 if not.
[424] Fix | Delete
No type checks or Ready calls are performed. */
[425] Fix | Delete
#define PyUnicode_IS_COMPACT(op) \
[426] Fix | Delete
(((PyASCIIObject*)(op))->state.compact)
[427] Fix | Delete
[428] Fix | Delete
/* Return true if the string is a compact ASCII string (use PyASCIIObject
[429] Fix | Delete
structure), or 0 if not. No type checks or Ready calls are performed. */
[430] Fix | Delete
#define PyUnicode_IS_COMPACT_ASCII(op) \
[431] Fix | Delete
(((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
[432] Fix | Delete
[433] Fix | Delete
enum PyUnicode_Kind {
[434] Fix | Delete
/* String contains only wstr byte characters. This is only possible
[435] Fix | Delete
when the string was created with a legacy API and _PyUnicode_Ready()
[436] Fix | Delete
has not been called yet. */
[437] Fix | Delete
PyUnicode_WCHAR_KIND = 0,
[438] Fix | Delete
/* Return values of the PyUnicode_KIND() macro: */
[439] Fix | Delete
PyUnicode_1BYTE_KIND = 1,
[440] Fix | Delete
PyUnicode_2BYTE_KIND = 2,
[441] Fix | Delete
PyUnicode_4BYTE_KIND = 4
[442] Fix | Delete
};
[443] Fix | Delete
[444] Fix | Delete
/* Return pointers to the canonical representation cast to unsigned char,
[445] Fix | Delete
Py_UCS2, or Py_UCS4 for direct character access.
[446] Fix | Delete
No checks are performed, use PyUnicode_KIND() before to ensure
[447] Fix | Delete
these will work correctly. */
[448] Fix | Delete
[449] Fix | Delete
#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
[450] Fix | Delete
#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
[451] Fix | Delete
#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
[452] Fix | Delete
[453] Fix | Delete
/* Return one of the PyUnicode_*_KIND values defined above. */
[454] Fix | Delete
#define PyUnicode_KIND(op) \
[455] Fix | Delete
(assert(PyUnicode_Check(op)), \
[456] Fix | Delete
assert(PyUnicode_IS_READY(op)), \
[457] Fix | Delete
((PyASCIIObject *)(op))->state.kind)
[458] Fix | Delete
[459] Fix | Delete
/* Return a void pointer to the raw unicode buffer. */
[460] Fix | Delete
#define _PyUnicode_COMPACT_DATA(op) \
[461] Fix | Delete
(PyUnicode_IS_ASCII(op) ? \
[462] Fix | Delete
((void*)((PyASCIIObject*)(op) + 1)) : \
[463] Fix | Delete
((void*)((PyCompactUnicodeObject*)(op) + 1)))
[464] Fix | Delete
[465] Fix | Delete
#define _PyUnicode_NONCOMPACT_DATA(op) \
[466] Fix | Delete
(assert(((PyUnicodeObject*)(op))->data.any), \
[467] Fix | Delete
((((PyUnicodeObject *)(op))->data.any)))
[468] Fix | Delete
[469] Fix | Delete
#define PyUnicode_DATA(op) \
[470] Fix | Delete
(assert(PyUnicode_Check(op)), \
[471] Fix | Delete
PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
[472] Fix | Delete
_PyUnicode_NONCOMPACT_DATA(op))
[473] Fix | Delete
[474] Fix | Delete
/* In the access macros below, "kind" may be evaluated more than once.
[475] Fix | Delete
All other macro parameters are evaluated exactly once, so it is safe
[476] Fix | Delete
to put side effects into them (such as increasing the index). */
[477] Fix | Delete
[478] Fix | Delete
/* Write into the canonical representation, this macro does not do any sanity
[479] Fix | Delete
checks and is intended for usage in loops. The caller should cache the
[480] Fix | Delete
kind and data pointers obtained from other macro calls.
[481] Fix | Delete
index is the index in the string (starts at 0) and value is the new
[482] Fix | Delete
code point value which should be written to that location. */
[483] Fix | Delete
#define PyUnicode_WRITE(kind, data, index, value) \
[484] Fix | Delete
do { \
[485] Fix | Delete
switch ((kind)) { \
[486] Fix | Delete
case PyUnicode_1BYTE_KIND: { \
[487] Fix | Delete
((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
[488] Fix | Delete
break; \
[489] Fix | Delete
} \
[490] Fix | Delete
case PyUnicode_2BYTE_KIND: { \
[491] Fix | Delete
((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
[492] Fix | Delete
break; \
[493] Fix | Delete
} \
[494] Fix | Delete
default: { \
[495] Fix | Delete
assert((kind) == PyUnicode_4BYTE_KIND); \
[496] Fix | Delete
((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
[497] Fix | Delete
} \
[498] Fix | Delete
} \
[499] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function