aboutsummaryrefslogtreecommitdiffstats
path: root/src/shared/quickjs/libunicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/shared/quickjs/libunicode.h')
-rw-r--r--src/shared/quickjs/libunicode.h108
1 files changed, 26 insertions, 82 deletions
diff --git a/src/shared/quickjs/libunicode.h b/src/shared/quickjs/libunicode.h
index cc2f244c7..8e6f2a01d 100644
--- a/src/shared/quickjs/libunicode.h
+++ b/src/shared/quickjs/libunicode.h
@@ -24,13 +24,28 @@
#ifndef LIBUNICODE_H
#define LIBUNICODE_H
-#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <inttypes.h>
-/* define it to include all the unicode tables (40KB larger) */
-#define CONFIG_ALL_UNICODE
+#ifdef __cplusplus
+extern "C" {
+#endif
#define LRE_CC_RES_LEN_MAX 3
+typedef enum {
+ UNICODE_NFC,
+ UNICODE_NFD,
+ UNICODE_NFKC,
+ UNICODE_NFKD,
+} UnicodeNormalizationEnum;
+
+int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
+int lre_canonicalize(uint32_t c, bool is_unicode);
+bool lre_is_cased(uint32_t c);
+bool lre_is_case_ignorable(uint32_t c);
+
/* char ranges */
typedef struct {
@@ -87,15 +102,11 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
const uint32_t *b_pt, int b_len, int op);
int cr_invert(CharRange *cr);
+int cr_regexp_canonicalize(CharRange *cr, bool is_unicode);
-int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
-
-typedef enum {
- UNICODE_NFC,
- UNICODE_NFD,
- UNICODE_NFKC,
- UNICODE_NFKD,
-} UnicodeNormalizationEnum;
+bool lre_is_id_start(uint32_t c);
+bool lre_is_id_continue(uint32_t c);
+bool lre_is_white_space(uint32_t c);
int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
UnicodeNormalizationEnum n_type,
@@ -103,80 +114,13 @@ int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
/* Unicode character range functions */
-int unicode_script(CharRange *cr, const char *script_name, int is_ext);
+int unicode_script(CharRange *cr,
+ const char *script_name, bool is_ext);
int unicode_general_category(CharRange *cr, const char *gc_name);
int unicode_prop(CharRange *cr, const char *prop_name);
-int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
-int lre_canonicalize(uint32_t c, int is_unicode);
-
-/* Code point type categories */
-enum {
- UNICODE_C_SPACE = (1 << 0),
- UNICODE_C_DIGIT = (1 << 1),
- UNICODE_C_UPPER = (1 << 2),
- UNICODE_C_LOWER = (1 << 3),
- UNICODE_C_UNDER = (1 << 4),
- UNICODE_C_DOLLAR = (1 << 5),
- UNICODE_C_XDIGIT = (1 << 6),
-};
-extern uint8_t const lre_ctype_bits[256];
-
-/* zero or non-zero return value */
-int lre_is_cased(uint32_t c);
-int lre_is_case_ignorable(uint32_t c);
-int lre_is_id_start(uint32_t c);
-int lre_is_id_continue(uint32_t c);
-
-static inline int lre_is_space_byte(uint8_t c) {
- return lre_ctype_bits[c] & UNICODE_C_SPACE;
-}
-
-static inline int lre_is_id_start_byte(uint8_t c) {
- return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
- UNICODE_C_UNDER | UNICODE_C_DOLLAR);
-}
-
-static inline int lre_is_id_continue_byte(uint8_t c) {
- return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
- UNICODE_C_UNDER | UNICODE_C_DOLLAR |
- UNICODE_C_DIGIT);
-}
-
-int lre_is_space_non_ascii(uint32_t c);
-
-static inline int lre_is_space(uint32_t c) {
- if (c < 256)
- return lre_is_space_byte(c);
- else
- return lre_is_space_non_ascii(c);
-}
-
-static inline int lre_js_is_ident_first(uint32_t c) {
- if (c < 128) {
- return lre_is_id_start_byte(c);
- } else {
-#ifdef CONFIG_ALL_UNICODE
- return lre_is_id_start(c);
-#else
- return !lre_is_space_non_ascii(c);
-#endif
- }
-}
-
-static inline int lre_js_is_ident_next(uint32_t c) {
- if (c < 128) {
- return lre_is_id_continue_byte(c);
- } else {
- /* ZWNJ and ZWJ are accepted in identifiers */
- if (c >= 0x200C && c <= 0x200D)
- return TRUE;
-#ifdef CONFIG_ALL_UNICODE
- return lre_is_id_continue(c);
-#else
- return !lre_is_space_non_ascii(c);
+#ifdef __cplusplus
+} /* extern "C" { */
#endif
- }
-}
#endif /* LIBUNICODE_H */