string.c 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. /*
  2. * string.c
  3. */
  4. #include "private.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "lub/ctype.h"
  8. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  9. const char *lub_string_esc_regex = "^$.*+[](){}";
  10. const char *lub_string_esc_quoted = "\\\"";
  11. /*--------------------------------------------------------- */
  12. void lub_string_free(char *ptr)
  13. {
  14. if (!ptr)
  15. return;
  16. free(ptr);
  17. ptr = NULL;
  18. }
  19. /*--------------------------------------------------------- */
  20. char *lub_string_ndecode(const char *string, unsigned int len)
  21. {
  22. const char *s = string;
  23. char *res, *p;
  24. int esc = 0;
  25. if (!string)
  26. return NULL;
  27. /* Allocate enough memory for result */
  28. p = res = malloc(len + 1);
  29. while (*s && (s < (string +len))) {
  30. if (!esc) {
  31. if ('\\' == *s)
  32. esc = 1;
  33. else
  34. *p = *s;
  35. } else {
  36. /* switch (*s) {
  37. case 'r':
  38. case 'n':
  39. *p = '\n';
  40. break;
  41. case 't':
  42. *p = '\t';
  43. break;
  44. default:
  45. *p = *s;
  46. break;
  47. }
  48. */ *p = *s;
  49. esc = 0;
  50. }
  51. if (!esc)
  52. p++;
  53. s++;
  54. }
  55. *p = '\0';
  56. return res;
  57. }
  58. /*--------------------------------------------------------- */
  59. inline char *lub_string_decode(const char *string)
  60. {
  61. return lub_string_ndecode(string, strlen(string));
  62. }
  63. /*----------------------------------------------------------- */
  64. /*
  65. * This needs to escape any dangerous characters within the command line
  66. * to prevent gaining access to the underlying system shell.
  67. */
  68. char *lub_string_encode(const char *string, const char *escape_chars)
  69. {
  70. char *result = NULL;
  71. const char *p;
  72. if (!escape_chars)
  73. return lub_string_dup(string);
  74. if (string && !(*string)) /* Empty string */
  75. return lub_string_dup(string);
  76. for (p = string; p && *p; p++) {
  77. /* find any special characters and prefix them with '\' */
  78. size_t len = strcspn(p, escape_chars);
  79. lub_string_catn(&result, p, len);
  80. p += len;
  81. if (*p) {
  82. lub_string_catn(&result, "\\", 1);
  83. lub_string_catn(&result, p, 1);
  84. } else {
  85. break;
  86. }
  87. }
  88. return result;
  89. }
  90. /*--------------------------------------------------------- */
  91. void lub_string_catn(char **string, const char *text, size_t len)
  92. {
  93. if (text) {
  94. char *q;
  95. size_t length, initlen, textlen = strlen(text);
  96. /* make sure the client cannot give us duff details */
  97. len = (len < textlen) ? len : textlen;
  98. /* remember the size of the original string */
  99. initlen = *string ? strlen(*string) : 0;
  100. /* account for '\0' */
  101. length = initlen + len + 1;
  102. /* allocate the memory for the result */
  103. q = realloc(*string, length);
  104. if (NULL != q) {
  105. *string = q;
  106. /* move to the end of the initial string */
  107. q += initlen;
  108. while (len--) {
  109. *q++ = *text++;
  110. }
  111. *q = '\0';
  112. }
  113. }
  114. }
  115. /*--------------------------------------------------------- */
  116. void lub_string_cat(char **string, const char *text)
  117. {
  118. size_t len = text ? strlen(text) : 0;
  119. lub_string_catn(string, text, len);
  120. }
  121. /*--------------------------------------------------------- */
  122. char *lub_string_dup(const char *string)
  123. {
  124. if (!string)
  125. return NULL;
  126. return strdup(string);
  127. }
  128. /*--------------------------------------------------------- */
  129. char *lub_string_dupn(const char *string, unsigned int len)
  130. {
  131. char *res = NULL;
  132. if (!string)
  133. return res;
  134. res = malloc(len + 1);
  135. strncpy(res, string, len);
  136. res[len] = '\0';
  137. return res;
  138. }
  139. /*--------------------------------------------------------- */
  140. int lub_string_nocasecmp(const char *cs, const char *ct)
  141. {
  142. int result = 0;
  143. while ((0 == result) && *cs && *ct) {
  144. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  145. * MACRO implementation uses braces to prevent multiple increments
  146. * when called.
  147. */
  148. int s = lub_ctype_tolower(*cs++);
  149. int t = lub_ctype_tolower(*ct++);
  150. result = s - t;
  151. }
  152. /*lint -e774 Boolean within 'if' always evealuates to True
  153. * not the case because of tolower() evaluating to 0 under lint
  154. * (see above)
  155. */
  156. if (0 == result) {
  157. /* account for different string lengths */
  158. result = *cs - *ct;
  159. }
  160. return result;
  161. }
  162. /*--------------------------------------------------------- */
  163. char *lub_string_tolower(const char *str)
  164. {
  165. char *tmp = strdup(str);
  166. char *p = tmp;
  167. while (*p) {
  168. *p = tolower(*p);
  169. p++;
  170. }
  171. return tmp;
  172. }
  173. /*--------------------------------------------------------- */
  174. const char *lub_string_nocasestr(const char *cs, const char *ct)
  175. {
  176. const char *p = NULL;
  177. const char *result = NULL;
  178. while (*cs) {
  179. const char *q = cs;
  180. p = ct;
  181. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  182. * MACRO implementation uses braces to prevent multiple increments
  183. * when called.
  184. */
  185. /*lint -e506 Constant value Boolean
  186. * not the case because of tolower() evaluating to 0 under lint
  187. * (see above)
  188. */
  189. while (*p && *q
  190. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  191. p++, q++;
  192. }
  193. if (0 == *p) {
  194. break;
  195. }
  196. cs++;
  197. }
  198. if (p && !*p) {
  199. /* we've found the first match of ct within cs */
  200. result = cs;
  201. }
  202. return result;
  203. }
  204. /*--------------------------------------------------------- */
  205. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  206. bool_t utf8)
  207. {
  208. unsigned int cnt = 0;
  209. if (!str1 || !str2)
  210. return cnt;
  211. while (*str1 && *str2) {
  212. if (*str1 != *str2)
  213. break;
  214. cnt++;
  215. str1++;
  216. str2++;
  217. }
  218. if (!utf8)
  219. return cnt;
  220. /* UTF8 features */
  221. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  222. cnt--;
  223. return cnt;
  224. }
  225. /*--------------------------------------------------------- */
  226. const char *lub_string_suffix(const char *string)
  227. {
  228. const char *p1, *p2;
  229. p1 = p2 = string;
  230. while (*p1) {
  231. if (lub_ctype_isspace(*p1)) {
  232. p2 = p1;
  233. p2++;
  234. }
  235. p1++;
  236. }
  237. return p2;
  238. }
  239. /*--------------------------------------------------------- */
  240. const char *lub_string_nextword(const char *string,
  241. size_t *len, size_t *offset, size_t *quoted)
  242. {
  243. const char *word;
  244. *quoted = 0;
  245. /* Find the start of a word (not including an opening quote) */
  246. while (*string && isspace(*string)) {
  247. string++;
  248. (*offset)++;
  249. }
  250. /* Is this the start of a quoted string ? */
  251. if (*string == '"') {
  252. *quoted = 1;
  253. string++;
  254. }
  255. word = string;
  256. *len = 0;
  257. /* Find the end of the word */
  258. while (*string) {
  259. if (*string == '\\') {
  260. string++;
  261. (*len)++;
  262. if (*string) {
  263. (*len)++;
  264. string++;
  265. }
  266. continue;
  267. }
  268. /* End of word */
  269. if (!*quoted && isspace(*string))
  270. break;
  271. if (*string == '"') {
  272. /* End of a quoted string */
  273. *quoted = 2;
  274. break;
  275. }
  276. (*len)++;
  277. string++;
  278. }
  279. return word;
  280. }
  281. /*--------------------------------------------------------- */
  282. unsigned int lub_string_wordcount(const char *line)
  283. {
  284. const char *word;
  285. unsigned int result = 0;
  286. size_t len = 0, offset = 0;
  287. size_t quoted;
  288. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  289. *word || quoted;
  290. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  291. /* account for the terminating quotation mark */
  292. len += quoted ? quoted - 1 : 0;
  293. result++;
  294. }
  295. return result;
  296. }
  297. /*--------------------------------------------------------- */