|
@@ -20,6 +20,138 @@
|
|
|
|
|
|
#include "private.h"
|
|
|
|
|
|
+/*-------------------------------------------------------- */
|
|
|
+#if 0
|
|
|
+int get_utf8(const char *sp, unsigned long *sym_out)
|
|
|
+{
|
|
|
+ int i = 0, l = 0;
|
|
|
+ unsigned long sym = 0;
|
|
|
+ const unsigned char *p = (const unsigned char*)sp;
|
|
|
+
|
|
|
+ if (sym_out)
|
|
|
+ *sym_out = *p;
|
|
|
+
|
|
|
+ if (!*p)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ if (!(*p & 0xc0))
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if ((*p & 0xe0) == 0xc0) {
|
|
|
+ l = 1;
|
|
|
+ sym = (*p & 0x1f);
|
|
|
+ } else if ((*p & 0xf0) == 0xe0) {
|
|
|
+ l = 2;
|
|
|
+ sym = (*p & 0xf);
|
|
|
+ } else if ((*p & 0xf8) == 0xf0) {
|
|
|
+ l = 3;
|
|
|
+ sym = (*p & 7);
|
|
|
+ } else if ((*p & 0xfc) == 0xf8) {
|
|
|
+ l = 4;
|
|
|
+ sym = (*p & 3);
|
|
|
+ } else if ((*p & 0xfe) == 0xfc) {
|
|
|
+ l = 5;
|
|
|
+ sym = (*p & 1);
|
|
|
+ } else {
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ p ++;
|
|
|
+ for (i = 0; i < l; i ++) {
|
|
|
+ sym <<= 6;
|
|
|
+ if ((*p & 0xc0) != 0x80) {
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+ sym |= (*p++ & 0x3f);
|
|
|
+ }
|
|
|
+ if (sym_out)
|
|
|
+ *sym_out = sym;
|
|
|
+ return l + 1;
|
|
|
+}
|
|
|
+#endif
|
|
|
+
|
|
|
+/*-------------------------------------------------------- */
|
|
|
+static int utf8_is_cjk(unsigned long sym)
|
|
|
+{
|
|
|
+ if (sym < 0x1100) /* Speed up for non-CJK chars */
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ if (sym >= 0x1100 && sym <= 0x11FF) /* Hangul Jamo */
|
|
|
+ return 1;
|
|
|
+#if 0
|
|
|
+ if (sym >=0x2E80 && sym <= 0x2EFF) /* CJK Radicals Supplement */
|
|
|
+ return 1;
|
|
|
+ if (sym >=0x2F00 && sym <= 0x2FDF) /* Kangxi Radicals */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x2FF0 && sym <= 0x2FFF) /* Ideographic Description Characters */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3000 && sym < 0x303F) /* CJK Symbols and Punctuation. The U+303f is half space */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3040 && sym <= 0x309F) /* Hiragana */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x30A0 && sym <=0x30FF) /* Katakana */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3100 && sym <=0x312F) /* Bopomofo */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3130 && sym <= 0x318F) /* Hangul Compatibility Jamo */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3190 && sym <= 0x319F) /* Kanbun */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x31A0 && sym <= 0x31BF) /* Bopomofo Extended */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x31C0 && sym <= 0x31EF) /* CJK strokes */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x31F0 && sym <= 0x31FF) /* Katakana Phonetic Extensions */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3200 && sym <= 0x32FF) /* Enclosed CJK Letters and Months */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3300 && sym <= 0x33FF) /* CJK Compatibility */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x3400 && sym <= 0x4DBF) /* CJK Unified Ideographs Extension A */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x4DC0 && sym <= 0x4DFF) /* Yijing Hexagram Symbols */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x4E00 && sym <= 0x9FFF) /* CJK Unified Ideographs */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0xA000 && sym <= 0xA48F) /* Yi Syllables */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0xA490 && sym <= 0xA4CF) /* Yi Radicals */
|
|
|
+ return 1;
|
|
|
+#endif
|
|
|
+ /* Speed up previous block */
|
|
|
+ if (sym >= 0x2E80 && sym <= 0xA4CF && sym != 0x303F)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if (sym >= 0xAC00 && sym <= 0xD7AF) /* Hangul Syllables */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0xF900 && sym <= 0xFAFF) /* CJK Compatibility Ideographs */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0xFE10 && sym <= 0xFE1F) /* Vertical Forms */
|
|
|
+ return 1;
|
|
|
+
|
|
|
+#if 0
|
|
|
+ if (sym >= 0xFE30 && sym <= 0xFE4F) /* CJK Compatibility Forms */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0xFE50 && sym <= 0xFE6F) /* Small Form Variants */
|
|
|
+ return 1;
|
|
|
+#endif
|
|
|
+ /* Speed up previous block */
|
|
|
+ if (sym >= 0xFE30 && sym <= 0xFE6F)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if ((sym >= 0xFF00 && sym <= 0xFF60) ||
|
|
|
+ (sym >= 0xFFE0 && sym <= 0xFFE6)) /* Fullwidth Forms */
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ if (sym >= 0x1D300 && sym <= 0x1D35F) /* Tai Xuan Jing Symbols */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x20000 && sym <= 0x2B81F) /* CJK Unified Ideographs Extensions B, C, D */
|
|
|
+ return 1;
|
|
|
+ if (sym >= 0x2F800 && sym <= 0x2FA1F) /* CJK Compatibility Ideographs Supplement */
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
/*-------------------------------------------------------- */
|
|
|
static void utf8_point_left(tinyrl_t * this)
|
|
|
{
|