string.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. /*
  2. * string.c
  3. */
  4. #include "private.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "lub/ctype.h"
  8. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  9. const char *lub_string_esc_regex = "^$.*+[](){}";
  10. const char *lub_string_esc_quoted = "\\\"";
  11. /*--------------------------------------------------------- */
  12. void lub_string_free(char *ptr)
  13. {
  14. if (!ptr)
  15. return;
  16. free(ptr);
  17. }
  18. /*--------------------------------------------------------- */
  19. char *lub_string_ndecode(const char *string, unsigned int len)
  20. {
  21. const char *s = string;
  22. char *res, *p;
  23. int esc = 0;
  24. if (!string)
  25. return NULL;
  26. /* Allocate enough memory for result */
  27. p = res = malloc(len + 1);
  28. while (*s && (s < (string +len))) {
  29. if (!esc) {
  30. if ('\\' == *s)
  31. esc = 1;
  32. else
  33. *p = *s;
  34. } else {
  35. /* switch (*s) {
  36. case 'r':
  37. case 'n':
  38. *p = '\n';
  39. break;
  40. case 't':
  41. *p = '\t';
  42. break;
  43. default:
  44. *p = *s;
  45. break;
  46. }
  47. */ *p = *s;
  48. esc = 0;
  49. }
  50. if (!esc)
  51. p++;
  52. s++;
  53. }
  54. *p = '\0';
  55. return res;
  56. }
  57. /*--------------------------------------------------------- */
  58. inline char *lub_string_decode(const char *string)
  59. {
  60. return lub_string_ndecode(string, strlen(string));
  61. }
  62. /*----------------------------------------------------------- */
  63. /*
  64. * This needs to escape any dangerous characters within the command line
  65. * to prevent gaining access to the underlying system shell.
  66. */
  67. char *lub_string_encode(const char *string, const char *escape_chars)
  68. {
  69. char *result = NULL;
  70. const char *p;
  71. if (!escape_chars)
  72. return lub_string_dup(string);
  73. if (string && !(*string)) /* Empty string */
  74. return lub_string_dup(string);
  75. for (p = string; p && *p; p++) {
  76. /* find any special characters and prefix them with '\' */
  77. size_t len = strcspn(p, escape_chars);
  78. lub_string_catn(&result, p, len);
  79. p += len;
  80. if (*p) {
  81. lub_string_catn(&result, "\\", 1);
  82. lub_string_catn(&result, p, 1);
  83. } else {
  84. break;
  85. }
  86. }
  87. return result;
  88. }
  89. /*--------------------------------------------------------- */
  90. void lub_string_catn(char **string, const char *text, size_t len)
  91. {
  92. if (text) {
  93. char *q;
  94. size_t length, initlen, textlen = strlen(text);
  95. /* make sure the client cannot give us duff details */
  96. len = (len < textlen) ? len : textlen;
  97. /* remember the size of the original string */
  98. initlen = *string ? strlen(*string) : 0;
  99. /* account for '\0' */
  100. length = initlen + len + 1;
  101. /* allocate the memory for the result */
  102. q = realloc(*string, length);
  103. if (NULL != q) {
  104. *string = q;
  105. /* move to the end of the initial string */
  106. q += initlen;
  107. while (len--) {
  108. *q++ = *text++;
  109. }
  110. *q = '\0';
  111. }
  112. }
  113. }
  114. /*--------------------------------------------------------- */
  115. void lub_string_cat(char **string, const char *text)
  116. {
  117. size_t len = text ? strlen(text) : 0;
  118. lub_string_catn(string, text, len);
  119. }
  120. /*--------------------------------------------------------- */
  121. char *lub_string_dup(const char *string)
  122. {
  123. if (!string)
  124. return NULL;
  125. return strdup(string);
  126. }
  127. /*--------------------------------------------------------- */
  128. char *lub_string_dupn(const char *string, unsigned int len)
  129. {
  130. char *res = NULL;
  131. if (!string)
  132. return res;
  133. res = malloc(len + 1);
  134. strncpy(res, string, len);
  135. res[len] = '\0';
  136. return res;
  137. }
  138. /*--------------------------------------------------------- */
  139. int lub_string_nocasecmp(const char *cs, const char *ct)
  140. {
  141. int result = 0;
  142. while ((0 == result) && *cs && *ct) {
  143. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  144. * MACRO implementation uses braces to prevent multiple increments
  145. * when called.
  146. */
  147. int s = lub_ctype_tolower(*cs++);
  148. int t = lub_ctype_tolower(*ct++);
  149. result = s - t;
  150. }
  151. /*lint -e774 Boolean within 'if' always evealuates to True
  152. * not the case because of tolower() evaluating to 0 under lint
  153. * (see above)
  154. */
  155. if (0 == result) {
  156. /* account for different string lengths */
  157. result = *cs - *ct;
  158. }
  159. return result;
  160. }
  161. /*--------------------------------------------------------- */
  162. char *lub_string_tolower(const char *str)
  163. {
  164. char *tmp = strdup(str);
  165. char *p = tmp;
  166. while (*p) {
  167. *p = tolower(*p);
  168. p++;
  169. }
  170. return tmp;
  171. }
  172. /*--------------------------------------------------------- */
  173. const char *lub_string_nocasestr(const char *cs, const char *ct)
  174. {
  175. const char *p = NULL;
  176. const char *result = NULL;
  177. while (*cs) {
  178. const char *q = cs;
  179. p = ct;
  180. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  181. * MACRO implementation uses braces to prevent multiple increments
  182. * when called.
  183. */
  184. /*lint -e506 Constant value Boolean
  185. * not the case because of tolower() evaluating to 0 under lint
  186. * (see above)
  187. */
  188. while (*p && *q
  189. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  190. p++, q++;
  191. }
  192. if (0 == *p) {
  193. break;
  194. }
  195. cs++;
  196. }
  197. if (p && !*p) {
  198. /* we've found the first match of ct within cs */
  199. result = cs;
  200. }
  201. return result;
  202. }
  203. /*--------------------------------------------------------- */
  204. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  205. bool_t utf8)
  206. {
  207. unsigned int cnt = 0;
  208. if (!str1 || !str2)
  209. return cnt;
  210. while (*str1 && *str2) {
  211. if (*str1 != *str2)
  212. break;
  213. cnt++;
  214. str1++;
  215. str2++;
  216. }
  217. if (!utf8)
  218. return cnt;
  219. /* UTF8 features */
  220. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  221. cnt--;
  222. return cnt;
  223. }
  224. /*--------------------------------------------------------- */
  225. const char *lub_string_suffix(const char *string)
  226. {
  227. const char *p1, *p2;
  228. p1 = p2 = string;
  229. while (*p1) {
  230. if (lub_ctype_isspace(*p1)) {
  231. p2 = p1;
  232. p2++;
  233. }
  234. p1++;
  235. }
  236. return p2;
  237. }
  238. /*--------------------------------------------------------- */
  239. /** @brief Find next word or quoted substring within string
  240. *
  241. * @param [in] str String to parse.
  242. * @param [out] len Length of found substring (without quotes).
  243. * @param [out] offset Pointer to first symbol after found substring.
  244. * @param [out] quoted Is substring quoted?
  245. * @param [out] qclosed Is closed quotes found?
  246. * @return Pointer to found substring (without quotes).
  247. */
  248. const char *lub_string_nextword(const char *str,
  249. size_t *len, const char **offset, bool_t *quoted, bool_t *qclosed,
  250. bool_t *altq)
  251. {
  252. const char *string = str;
  253. const char *word = NULL;
  254. const char dbl_quote = '"';
  255. bool_t dbl_quoted = BOOL_FALSE;
  256. const char alt_quote = '`';
  257. unsigned int alt_quote_num = 0; // Number of opening alt quotes
  258. bool_t alt_quoted = BOOL_FALSE;
  259. bool_t closed_quote = BOOL_FALSE;
  260. size_t length = 0;
  261. // Find the start of a word (not including an opening quote)
  262. while (*string && isspace(*string))
  263. string++;
  264. // Is this the start of a quoted string?
  265. if (*string == dbl_quote) {
  266. dbl_quoted = BOOL_TRUE;
  267. string++;
  268. } else if (*string == alt_quote) {
  269. alt_quoted = BOOL_TRUE;
  270. while (string && (*string == alt_quote)) {
  271. string++;
  272. alt_quote_num++; // Count starting quotes
  273. }
  274. }
  275. word = string;
  276. // Find the end of the word
  277. while (*string) {
  278. // Standard double quotation
  279. if (dbl_quoted) {
  280. // End of word
  281. if (*string == dbl_quote) {
  282. closed_quote = BOOL_TRUE;
  283. string++;
  284. break;
  285. }
  286. // Alternative multi quotation
  287. } else if (alt_quoted) {
  288. unsigned int qnum = alt_quote_num;
  289. while (string && (*string == alt_quote) && qnum) {
  290. string++;
  291. length++;
  292. qnum--;
  293. }
  294. if (0 == qnum) { // End of word was found
  295. // Quotes themselfs are not a part of a word
  296. length -= alt_quote_num;
  297. closed_quote = BOOL_TRUE;
  298. break;
  299. }
  300. if (qnum != alt_quote_num) // Skipped some qoute symbols
  301. continue;
  302. // Not quoted
  303. } else {
  304. // End of word
  305. if (isspace(*string))
  306. break;
  307. }
  308. // Common case
  309. // Escaping. It doesn't work within alt quoting
  310. if (!alt_quoted && (*string == '\\')) {
  311. // Skip escaping
  312. string++;
  313. length++;
  314. // Skip escaped symbol
  315. if (*string) {
  316. length++;
  317. string++;
  318. }
  319. continue;
  320. }
  321. length++;
  322. string++;
  323. }
  324. // Skip strange symbols after quotation
  325. while (*string && !isspace(*string))
  326. string++;
  327. if (len)
  328. *len = length;
  329. if (offset)
  330. *offset = string;
  331. if (quoted)
  332. *quoted = dbl_quoted || alt_quoted;
  333. if (qclosed)
  334. *qclosed = closed_quote;
  335. if (altq)
  336. *altq = alt_quoted;
  337. return word;
  338. }
  339. /*--------------------------------------------------------- */
  340. unsigned int lub_string_wordcount(const char *line)
  341. {
  342. const char *word = NULL;
  343. unsigned int result = 0;
  344. const char *offset = NULL;
  345. for (word = lub_string_nextword(line, NULL, &offset, NULL, NULL, NULL);
  346. word && (*word != '\0');
  347. word = lub_string_nextword(offset, NULL, &offset, NULL, NULL, NULL)) {
  348. result++;
  349. }
  350. return result;
  351. }
  352. /*--------------------------------------------------------- */