Browse Source

Try to support multibyte (>2) UTF-8

Serj Kalichev 10 years ago
parent
commit
9677d07407
2 changed files with 21 additions and 4 deletions
  1. 1 0
      lub/string.h
  2. 20 4
      tinyrl/tinyrl.c

+ 1 - 0
lub/string.h

@@ -37,6 +37,7 @@ If a "const char *" is returned then the client has no responsiblity for releasi
 #include "lub/types.h"
 
 #define UTF8_MASK 0xC0
+#define UTF8_7BIT_MASK 0x80 /* One byte or multibyte */
 #define UTF8_11   0xC0 /* First UTF8 byte */
 #define UTF8_10   0x80 /* Next UTF8 bytes */
 

+ 20 - 4
tinyrl/tinyrl.c

@@ -708,6 +708,7 @@ static char *internal_readline(tinyrl_t * this,
 	this->context = context;
 
 	if (this->isatty && !str) {
+		unsigned int utf8_cont = 0; /* UTF-8 continue bytes */
 		/* set the terminal into raw input mode */
 		tty_set_raw_mode(this);
 		tinyrl_reset_line_state(this);
@@ -735,10 +736,25 @@ static char *internal_readline(tinyrl_t * this,
 							this->end - 1,
 							this->end);
 				} else {
-					/* Update the display if the key
-					is not first UTF8 byte */
-					if (!(this->utf8 &&
-						(UTF8_11 == (key & UTF8_MASK))))
+					if (this->utf8) {
+						if (!(UTF8_7BIT_MASK & key)) /* ASCII char */
+							utf8_cont = 0;
+						else if (utf8_cont && (UTF8_10 == (key & UTF8_MASK))) /* Continue byte */
+							utf8_cont--;
+						else if (UTF8_11 == (key & UTF8_MASK)) { /* First byte of multibyte char */
+							/* Find out number of char's bytes */
+							int b = key;
+							utf8_cont = 0;
+							while ((utf8_cont < 6) && (UTF8_10 != (b & UTF8_MASK))) {
+								utf8_cont++;
+								b = b << 1;
+							}
+						}
+					}
+					/* For non UTF-8 encoding the utf8_cont is always 0.
+					   For UTF-8 it's 0 when one-byte symbol or we get
+					   all bytes for the current multibyte character. */
+					if (!utf8_cont)
 						tinyrl_redisplay(this);
 				}
 			} else { /* Error || EOF || Timeout */