Browse Source

Add alternative quotation by multi ` symbols

Serj Kalichev 3 years ago
parent
commit
d710c0df5e
4 changed files with 125 additions and 64 deletions
  1. 13 17
      lub/argv/argv.c
  2. 14 9
      lub/ini/ini.c
  3. 2 2
      lub/string.h
  4. 96 36
      lub/string/string.c

+ 13 - 17
lub/argv/argv.c

@@ -10,12 +10,14 @@
 #include <ctype.h>
 
 /*--------------------------------------------------------- */
-static void lub_argv_init(lub_argv_t * this, const char *line, size_t offset)
+static void lub_argv_init(lub_argv_t * this, const char *line, size_t off)
 {
-	size_t len;
-	const char *word;
-	lub_arg_t *arg;
-	size_t quoted;
+	size_t len = 0;
+	const char *word = NULL;
+	lub_arg_t *arg = NULL;
+	bool_t quoted = BOOL_FALSE;
+	const char *str = line + off; // Start on specified offset
+	const char *offset = NULL;
 
 	this->argv = NULL;
 	this->argc = 0;
@@ -30,19 +32,13 @@ static void lub_argv_init(lub_argv_t * this, const char *line, size_t offset)
 	assert(arg);
 
 	/* then fill out the array with the words */
-	for (word = lub_string_nextword(line, &len, &offset, &quoted);
-		*word || quoted;
-		word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
+	for (word = lub_string_nextword(str, &len, &offset, &quoted, NULL);
+		word && (*word != '\0');
+		word = lub_string_nextword(str, &len, &offset, &quoted, NULL)) {
 		(*arg).arg = lub_string_ndecode(word, len);
-		(*arg).offset = offset;
-		(*arg).quoted = quoted ? BOOL_TRUE : BOOL_FALSE;
-
-		offset += len;
-
-		if (quoted) {
-			len += quoted - 1; /* account for terminating quotation mark */
-			offset += quoted; /* account for quotation marks */
-		}
+		(*arg).offset = offset - line;
+		(*arg).quoted = quoted;
+		str = offset;
 		arg++;
 	}
 }

+ 14 - 9
lub/ini/ini.c

@@ -90,20 +90,25 @@ const char *lub_ini_find(const lub_ini_t *this, const char *name)
 /*--------------------------------------------------------- */
 int lub_ini_parse_str(lub_ini_t *this, const char *ini)
 {
-	char *buffer;
+	char *buffer = NULL;
 	char *saveptr = NULL;
-	char *line;
+	char *line = NULL;
 
 	buffer = lub_string_dup(ini);
 	/* Now loop though each line */
 	for (line = strtok_r(buffer, "\n", &saveptr);
 		line; line = strtok_r(NULL, "\n", &saveptr)) {
 
-		char *str, *name, *value, *savestr = NULL, *ns = line;
-		const char *begin;
-		size_t len, offset, quoted;
-		char *rname, *rvalue;
-		lub_pair_t *pair;
+		char *str = NULL;
+		char *name = NULL;
+		char *value = NULL;
+		char *savestr = NULL;
+		char *ns = line;
+		const char *begin = NULL;
+		size_t len = 0;
+		char *rname = NULL;
+		char *rvalue = NULL;
+		lub_pair_t *pair = NULL;
 
 		if (!*ns) /* Empty */
 			continue;
@@ -120,12 +125,12 @@ int lub_ini_parse_str(lub_ini_t *this, const char *ini)
 			continue;
 		}
 		value = strtok_r(NULL, "=", &savestr);
-		begin = lub_string_nextword(name, &len, &offset, &quoted);
+		begin = lub_string_nextword(name, &len, NULL, NULL, NULL);
 		rname = lub_string_dupn(begin, len);
 		if (!value) /* Empty value */
 			rvalue = NULL;
 		else {
-			begin = lub_string_nextword(value, &len, &offset, &quoted);
+			begin = lub_string_nextword(value, &len, NULL, NULL, NULL);
 			rvalue = lub_string_dupn(begin, len);
 		}
 		pair = lub_pair_new(rname, rvalue);

+ 2 - 2
lub/string.h

@@ -270,8 +270,8 @@ char *lub_string_encode(const char *string, const char *escape_chars);
 char *lub_string_tolower(const char *str);
 unsigned int lub_string_equal_part(const char *str1, const char *str2,
 	bool_t utf8);
-const char *lub_string_nextword(const char *string,
-	size_t *len, size_t *offset, size_t *quoted);
+const char *lub_string_nextword(const char *str,
+	size_t *len, const char **offset, bool_t *quoted, bool_t *qclosed);
 unsigned int lub_string_wordcount(const char *line);
 
 _END_C_DECL

+ 96 - 36
lub/string/string.c

@@ -274,65 +274,125 @@ const char *lub_string_suffix(const char *string)
 }
 
 /*--------------------------------------------------------- */
-const char *lub_string_nextword(const char *string,
-	size_t *len, size_t *offset, size_t *quoted)
+/** @brief Find next word or quoted substring within string
+ *
+ * @param [in] str String to parse.
+ * @param [out] len Length of found substring (without quotes).
+ * @param [out] offset Pointer to first symbol after found substring.
+ * @param [out] quoted Is substring quoted?
+ * @param [out] qclosed Is closed quotes found?
+ * @return Pointer to found substring (without quotes).
+ */
+const char *lub_string_nextword(const char *str,
+	size_t *len, const char **offset, bool_t *quoted, bool_t *qclosed)
 {
-	const char *word;
-
-	*quoted = 0;
-
-	/* Find the start of a word (not including an opening quote) */
-	while (*string && isspace(*string)) {
+	const char *string = str;
+	const char *word = NULL;
+	const char dbl_quote = '"';
+	bool_t dbl_quoted = BOOL_FALSE;
+	const char alt_quote = '`';
+	unsigned int alt_quote_num = 0; // Number of opening alt quotes
+	bool_t alt_quoted = BOOL_FALSE;
+	bool_t closed_quote = BOOL_FALSE;
+	size_t length = 0;
+
+	// Find the start of a word (not including an opening quote)
+	while (*string && isspace(*string))
 		string++;
-		(*offset)++;
-	}
-	/* Is this the start of a quoted string ? */
-	if (*string == '"') {
-		*quoted = 1;
+
+	// Is this the start of a quoted string?
+	if (*string == dbl_quote) {
+		dbl_quoted = BOOL_TRUE;
 		string++;
+	} else if (*string == alt_quote) {
+		alt_quoted = BOOL_TRUE;
+		while (string && (*string == alt_quote)) {
+			string++;
+			alt_quote_num++; // Count starting quotes
+		}
 	}
 	word = string;
-	*len = 0;
 
-	/* Find the end of the word */
+	// Find the end of the word
 	while (*string) {
-		if (*string == '\\') {
+
+		// Standard double quotation
+		if (dbl_quoted) {
+			// End of word
+			if (*string == dbl_quote) {
+				closed_quote = BOOL_TRUE;
+				string++;
+				break;
+			}
+
+		// Alternative multi quotation
+		} else if (alt_quoted) {
+			unsigned int qnum = alt_quote_num;
+			while (string && (*string == alt_quote) && qnum) {
+				string++;
+				length++;
+				qnum--;
+			}
+			if (0 == qnum) { // End of word was found
+				// Quotes themselfs are not a part of a word
+				length -= alt_quote_num;
+				closed_quote = BOOL_TRUE;
+				break;
+			}
+			if (qnum != alt_quote_num) // Skipped some qoute symbols
+				continue;
+
+		// Not quoted
+		} else {
+			// End of word
+			if (isspace(*string))
+				break;
+		}
+
+		// Common case
+		// Escaping. It doesn't work within alt quoting
+		if (!alt_quoted && (*string == '\\')) {
+			// Skip escaping
 			string++;
-			(*len)++;
+			length++;
+			// Skip escaped symbol
 			if (*string) {
-				(*len)++;
+				length++;
 				string++;
 			}
 			continue;
 		}
-		/* End of word */
-		if (!*quoted && isspace(*string))
-			break;
-		if (*string == '"') {
-			/* End of a quoted string */
-			*quoted = 2;
-			break;
-		}
-		(*len)++;
+
+		length++;
 		string++;
 	}
 
+	// Skip strange symbols after quotation
+	while (*string && !isspace(*string))
+		string++;
+
+	if (len)
+		*len = length;
+	if (offset)
+		*offset = string;
+	if (quoted)
+		*quoted = dbl_quoted || alt_quoted;
+	if (qclosed)
+		*qclosed = closed_quote;
+
 	return word;
 }
 
 /*--------------------------------------------------------- */
 unsigned int lub_string_wordcount(const char *line)
 {
-	const char *word;
+	const char *word = NULL;
 	unsigned int result = 0;
-	size_t len = 0, offset = 0;
-	size_t quoted;
-
-	for (word = lub_string_nextword(line, &len, &offset, &quoted);
-		*word || quoted;
-		word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
-		/* account for the terminating quotation mark */
-		len += quoted ? quoted - 1 : 0;
+	const char *offset = NULL;
+
+	for (word = lub_string_nextword(line, NULL, &offset, NULL, NULL);
+		word && (*word != '\0');
+		word = lub_string_nextword(offset, NULL, &offset, NULL, NULL)) {
 		result++;
 	}