diff options
Diffstat (limited to 'src/shared/quickjs/libunicode.h')
-rw-r--r-- | src/shared/quickjs/libunicode.h | 108 |
1 files changed, 26 insertions, 82 deletions
diff --git a/src/shared/quickjs/libunicode.h b/src/shared/quickjs/libunicode.h index cc2f244c7..8e6f2a01d 100644 --- a/src/shared/quickjs/libunicode.h +++ b/src/shared/quickjs/libunicode.h @@ -24,13 +24,28 @@ #ifndef LIBUNICODE_H #define LIBUNICODE_H -#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> +#include <inttypes.h> -/* define it to include all the unicode tables (40KB larger) */ -#define CONFIG_ALL_UNICODE +#ifdef __cplusplus +extern "C" { +#endif #define LRE_CC_RES_LEN_MAX 3 +typedef enum { + UNICODE_NFC, + UNICODE_NFD, + UNICODE_NFKC, + UNICODE_NFKD, +} UnicodeNormalizationEnum; + +int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); +int lre_canonicalize(uint32_t c, bool is_unicode); +bool lre_is_cased(uint32_t c); +bool lre_is_case_ignorable(uint32_t c); + /* char ranges */ typedef struct { @@ -87,15 +102,11 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, const uint32_t *b_pt, int b_len, int op); int cr_invert(CharRange *cr); +int cr_regexp_canonicalize(CharRange *cr, bool is_unicode); -int cr_regexp_canonicalize(CharRange *cr, int is_unicode); - -typedef enum { - UNICODE_NFC, - UNICODE_NFD, - UNICODE_NFKC, - UNICODE_NFKD, -} UnicodeNormalizationEnum; +bool lre_is_id_start(uint32_t c); +bool lre_is_id_continue(uint32_t c); +bool lre_is_white_space(uint32_t c); int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, UnicodeNormalizationEnum n_type, @@ -103,80 +114,13 @@ int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, /* Unicode character range functions */ -int unicode_script(CharRange *cr, const char *script_name, int is_ext); +int unicode_script(CharRange *cr, + const char *script_name, bool is_ext); int unicode_general_category(CharRange *cr, const char *gc_name); int unicode_prop(CharRange *cr, const char *prop_name); -int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); -int lre_canonicalize(uint32_t c, int is_unicode); - -/* Code point type categories */ -enum { - UNICODE_C_SPACE = (1 << 0), - UNICODE_C_DIGIT = (1 << 1), - UNICODE_C_UPPER = (1 << 2), - UNICODE_C_LOWER = (1 << 3), - UNICODE_C_UNDER = (1 << 4), - UNICODE_C_DOLLAR = (1 << 5), - UNICODE_C_XDIGIT = (1 << 6), -}; -extern uint8_t const lre_ctype_bits[256]; - -/* zero or non-zero return value */ -int lre_is_cased(uint32_t c); -int lre_is_case_ignorable(uint32_t c); -int lre_is_id_start(uint32_t c); -int lre_is_id_continue(uint32_t c); - -static inline int lre_is_space_byte(uint8_t c) { - return lre_ctype_bits[c] & UNICODE_C_SPACE; -} - -static inline int lre_is_id_start_byte(uint8_t c) { - return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER | - UNICODE_C_UNDER | UNICODE_C_DOLLAR); -} - -static inline int lre_is_id_continue_byte(uint8_t c) { - return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER | - UNICODE_C_UNDER | UNICODE_C_DOLLAR | - UNICODE_C_DIGIT); -} - -int lre_is_space_non_ascii(uint32_t c); - -static inline int lre_is_space(uint32_t c) { - if (c < 256) - return lre_is_space_byte(c); - else - return lre_is_space_non_ascii(c); -} - -static inline int lre_js_is_ident_first(uint32_t c) { - if (c < 128) { - return lre_is_id_start_byte(c); - } else { -#ifdef CONFIG_ALL_UNICODE - return lre_is_id_start(c); -#else - return !lre_is_space_non_ascii(c); -#endif - } -} - -static inline int lre_js_is_ident_next(uint32_t c) { - if (c < 128) { - return lre_is_id_continue_byte(c); - } else { - /* ZWNJ and ZWJ are accepted in identifiers */ - if (c >= 0x200C && c <= 0x200D) - return TRUE; -#ifdef CONFIG_ALL_UNICODE - return lre_is_id_continue(c); -#else - return !lre_is_space_non_ascii(c); +#ifdef __cplusplus +} /* extern "C" { */ #endif - } -} #endif /* LIBUNICODE_H */ |