string.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*
  2. * string.c
  3. */
  4. #include "private.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "lub/ctype.h"
  8. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  9. const char *lub_string_esc_regex = "^$.*+[](){}";
  10. const char *lub_string_esc_quoted = "\\\"";
  11. /*--------------------------------------------------------- */
  12. void lub_string_free(char *ptr)
  13. {
  14. if (!ptr)
  15. return;
  16. free(ptr);
  17. }
  18. /*--------------------------------------------------------- */
  19. char *lub_string_ndecode(const char *string, unsigned int len)
  20. {
  21. const char *s = string;
  22. char *res, *p;
  23. int esc = 0;
  24. if (!string)
  25. return NULL;
  26. /* Allocate enough memory for result */
  27. p = res = malloc(len + 1);
  28. while (*s && (s < (string +len))) {
  29. if (!esc) {
  30. if ('\\' == *s)
  31. esc = 1;
  32. else
  33. *p = *s;
  34. } else {
  35. /* switch (*s) {
  36. case 'r':
  37. case 'n':
  38. *p = '\n';
  39. break;
  40. case 't':
  41. *p = '\t';
  42. break;
  43. default:
  44. *p = *s;
  45. break;
  46. }
  47. */ *p = *s;
  48. esc = 0;
  49. }
  50. if (!esc)
  51. p++;
  52. s++;
  53. }
  54. *p = '\0';
  55. return res;
  56. }
  57. /*--------------------------------------------------------- */
  58. inline char *lub_string_decode(const char *string)
  59. {
  60. return lub_string_ndecode(string, strlen(string));
  61. }
  62. /*----------------------------------------------------------- */
  63. /*
  64. * This needs to escape any dangerous characters within the command line
  65. * to prevent gaining access to the underlying system shell.
  66. */
  67. char *lub_string_encode(const char *string, const char *escape_chars)
  68. {
  69. char *result = NULL;
  70. const char *p;
  71. if (!escape_chars)
  72. return lub_string_dup(string);
  73. if (string && !(*string)) /* Empty string */
  74. return lub_string_dup(string);
  75. for (p = string; p && *p; p++) {
  76. /* find any special characters and prefix them with '\' */
  77. size_t len = strcspn(p, escape_chars);
  78. lub_string_catn(&result, p, len);
  79. p += len;
  80. if (*p) {
  81. lub_string_catn(&result, "\\", 1);
  82. lub_string_catn(&result, p, 1);
  83. } else {
  84. break;
  85. }
  86. }
  87. return result;
  88. }
  89. /*--------------------------------------------------------- */
  90. void lub_string_catn(char **string, const char *text, size_t len)
  91. {
  92. if (text) {
  93. char *q;
  94. size_t length, initlen, textlen = strlen(text);
  95. /* make sure the client cannot give us duff details */
  96. len = (len < textlen) ? len : textlen;
  97. /* remember the size of the original string */
  98. initlen = *string ? strlen(*string) : 0;
  99. /* account for '\0' */
  100. length = initlen + len + 1;
  101. /* allocate the memory for the result */
  102. q = realloc(*string, length);
  103. if (NULL != q) {
  104. *string = q;
  105. /* move to the end of the initial string */
  106. q += initlen;
  107. while (len--) {
  108. *q++ = *text++;
  109. }
  110. *q = '\0';
  111. }
  112. }
  113. }
  114. /*--------------------------------------------------------- */
  115. void lub_string_cat(char **string, const char *text)
  116. {
  117. size_t len = text ? strlen(text) : 0;
  118. lub_string_catn(string, text, len);
  119. }
  120. /*--------------------------------------------------------- */
  121. char *lub_string_dup(const char *string)
  122. {
  123. if (!string)
  124. return NULL;
  125. return strdup(string);
  126. }
  127. /*--------------------------------------------------------- */
  128. char *lub_string_dupn(const char *string, unsigned int len)
  129. {
  130. char *res = NULL;
  131. if (!string)
  132. return res;
  133. res = malloc(len + 1);
  134. strncpy(res, string, len);
  135. res[len] = '\0';
  136. return res;
  137. }
  138. /*--------------------------------------------------------- */
  139. int lub_string_nocasecmp(const char *cs, const char *ct)
  140. {
  141. int result = 0;
  142. while ((0 == result) && *cs && *ct) {
  143. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  144. * MACRO implementation uses braces to prevent multiple increments
  145. * when called.
  146. */
  147. int s = lub_ctype_tolower(*cs++);
  148. int t = lub_ctype_tolower(*ct++);
  149. result = s - t;
  150. }
  151. /*lint -e774 Boolean within 'if' always evealuates to True
  152. * not the case because of tolower() evaluating to 0 under lint
  153. * (see above)
  154. */
  155. if (0 == result) {
  156. /* account for different string lengths */
  157. result = *cs - *ct;
  158. }
  159. return result;
  160. }
  161. /*--------------------------------------------------------- */
  162. char *lub_string_tolower(const char *str)
  163. {
  164. char *tmp = strdup(str);
  165. char *p = tmp;
  166. while (*p) {
  167. *p = tolower(*p);
  168. p++;
  169. }
  170. return tmp;
  171. }
  172. /*--------------------------------------------------------- */
  173. const char *lub_string_nocasestr(const char *cs, const char *ct)
  174. {
  175. const char *p = NULL;
  176. const char *result = NULL;
  177. while (*cs) {
  178. const char *q = cs;
  179. p = ct;
  180. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  181. * MACRO implementation uses braces to prevent multiple increments
  182. * when called.
  183. */
  184. /*lint -e506 Constant value Boolean
  185. * not the case because of tolower() evaluating to 0 under lint
  186. * (see above)
  187. */
  188. while (*p && *q
  189. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  190. p++, q++;
  191. }
  192. if (0 == *p) {
  193. break;
  194. }
  195. cs++;
  196. }
  197. if (p && !*p) {
  198. /* we've found the first match of ct within cs */
  199. result = cs;
  200. }
  201. return result;
  202. }
  203. /*--------------------------------------------------------- */
  204. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  205. bool_t utf8)
  206. {
  207. unsigned int cnt = 0;
  208. if (!str1 || !str2)
  209. return cnt;
  210. while (*str1 && *str2) {
  211. if (*str1 != *str2)
  212. break;
  213. cnt++;
  214. str1++;
  215. str2++;
  216. }
  217. if (!utf8)
  218. return cnt;
  219. /* UTF8 features */
  220. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  221. cnt--;
  222. return cnt;
  223. }
  224. /*--------------------------------------------------------- */
  225. const char *lub_string_suffix(const char *string)
  226. {
  227. const char *p1, *p2;
  228. p1 = p2 = string;
  229. while (*p1) {
  230. if (lub_ctype_isspace(*p1)) {
  231. p2 = p1;
  232. p2++;
  233. }
  234. p1++;
  235. }
  236. return p2;
  237. }
  238. /*--------------------------------------------------------- */
  239. const char *lub_string_nextword(const char *string,
  240. size_t *len, size_t *offset, size_t *quoted)
  241. {
  242. const char *word;
  243. *quoted = 0;
  244. /* Find the start of a word (not including an opening quote) */
  245. while (*string && isspace(*string)) {
  246. string++;
  247. (*offset)++;
  248. }
  249. /* Is this the start of a quoted string ? */
  250. if (*string == '"') {
  251. *quoted = 1;
  252. string++;
  253. }
  254. word = string;
  255. *len = 0;
  256. /* Find the end of the word */
  257. while (*string) {
  258. if (*string == '\\') {
  259. string++;
  260. (*len)++;
  261. if (*string) {
  262. (*len)++;
  263. string++;
  264. }
  265. continue;
  266. }
  267. /* End of word */
  268. if (!*quoted && isspace(*string))
  269. break;
  270. if (*string == '"') {
  271. /* End of a quoted string */
  272. *quoted = 2;
  273. break;
  274. }
  275. (*len)++;
  276. string++;
  277. }
  278. return word;
  279. }
  280. /*--------------------------------------------------------- */
  281. unsigned int lub_string_wordcount(const char *line)
  282. {
  283. const char *word;
  284. unsigned int result = 0;
  285. size_t len = 0, offset = 0;
  286. size_t quoted;
  287. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  288. *word || quoted;
  289. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  290. /* account for the terminating quotation mark */
  291. len += quoted ? quoted - 1 : 0;
  292. result++;
  293. }
  294. return result;
  295. }
  296. /*--------------------------------------------------------- */