string.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. /*
  2. * string.c
  3. */
  4. #include "private.h"
  5. #include <stdlib.h>
  6. #include <string.h>
  7. #include "lub/ctype.h"
  8. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  9. const char *lub_string_esc_regex = "^$.*+[](){}";
  10. const char *lub_string_esc_quoted = "\\\"";
  11. /*--------------------------------------------------------- */
  12. void lub_string_free(char *ptr)
  13. {
  14. free(ptr);
  15. }
  16. /*--------------------------------------------------------- */
  17. char *lub_string_ndecode(const char *string, unsigned int len)
  18. {
  19. const char *s = string;
  20. char *res, *p;
  21. int esc = 0;
  22. if (!string)
  23. return NULL;
  24. /* Allocate enough memory for result */
  25. p = res = malloc(len + 1);
  26. while (*s && (s < (string +len))) {
  27. if (!esc) {
  28. if ('\\' == *s)
  29. esc = 1;
  30. else
  31. *p = *s;
  32. } else {
  33. /* switch (*s) {
  34. case 'r':
  35. case 'n':
  36. *p = '\n';
  37. break;
  38. case 't':
  39. *p = '\t';
  40. break;
  41. default:
  42. *p = *s;
  43. break;
  44. }
  45. */ *p = *s;
  46. esc = 0;
  47. }
  48. if (!esc)
  49. p++;
  50. s++;
  51. }
  52. *p = '\0';
  53. return res;
  54. }
  55. /*--------------------------------------------------------- */
  56. inline char *lub_string_decode(const char *string)
  57. {
  58. return lub_string_ndecode(string, strlen(string));
  59. }
  60. /*----------------------------------------------------------- */
  61. /*
  62. * This needs to escape any dangerous characters within the command line
  63. * to prevent gaining access to the underlying system shell.
  64. */
  65. char *lub_string_encode(const char *string, const char *escape_chars)
  66. {
  67. char *result = NULL;
  68. const char *p;
  69. if (!escape_chars)
  70. return lub_string_dup(string);
  71. if (string && !(*string)) /* Empty string */
  72. return lub_string_dup(string);
  73. for (p = string; p && *p; p++) {
  74. /* find any special characters and prefix them with '\' */
  75. size_t len = strcspn(p, escape_chars);
  76. lub_string_catn(&result, p, len);
  77. p += len;
  78. if (*p) {
  79. lub_string_catn(&result, "\\", 1);
  80. lub_string_catn(&result, p, 1);
  81. } else {
  82. break;
  83. }
  84. }
  85. return result;
  86. }
  87. /*--------------------------------------------------------- */
  88. void lub_string_catn(char **string, const char *text, size_t len)
  89. {
  90. if (text) {
  91. char *q;
  92. size_t length, initlen, textlen = strlen(text);
  93. /* make sure the client cannot give us duff details */
  94. len = (len < textlen) ? len : textlen;
  95. /* remember the size of the original string */
  96. initlen = *string ? strlen(*string) : 0;
  97. /* account for '\0' */
  98. length = initlen + len + 1;
  99. /* allocate the memory for the result */
  100. q = realloc(*string, length);
  101. if (NULL != q) {
  102. *string = q;
  103. /* move to the end of the initial string */
  104. q += initlen;
  105. while (len--) {
  106. *q++ = *text++;
  107. }
  108. *q = '\0';
  109. }
  110. }
  111. }
  112. /*--------------------------------------------------------- */
  113. void lub_string_cat(char **string, const char *text)
  114. {
  115. size_t len = text ? strlen(text) : 0;
  116. lub_string_catn(string, text, len);
  117. }
  118. /*--------------------------------------------------------- */
  119. char *lub_string_dup(const char *string)
  120. {
  121. if (!string)
  122. return NULL;
  123. return strdup(string);
  124. }
  125. /*--------------------------------------------------------- */
  126. char *lub_string_dupn(const char *string, unsigned int len)
  127. {
  128. char *res = NULL;
  129. if (!string)
  130. return res;
  131. res = malloc(len + 1);
  132. strncpy(res, string, len);
  133. res[len] = '\0';
  134. return res;
  135. }
  136. /*--------------------------------------------------------- */
  137. int lub_string_nocasecmp(const char *cs, const char *ct)
  138. {
  139. int result = 0;
  140. while ((0 == result) && *cs && *ct) {
  141. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  142. * MACRO implementation uses braces to prevent multiple increments
  143. * when called.
  144. */
  145. int s = lub_ctype_tolower(*cs++);
  146. int t = lub_ctype_tolower(*ct++);
  147. result = s - t;
  148. }
  149. /*lint -e774 Boolean within 'if' always evealuates to True
  150. * not the case because of tolower() evaluating to 0 under lint
  151. * (see above)
  152. */
  153. if (0 == result) {
  154. /* account for different string lengths */
  155. result = *cs - *ct;
  156. }
  157. return result;
  158. }
  159. /*--------------------------------------------------------- */
  160. char *lub_string_tolower(const char *str)
  161. {
  162. char *tmp = strdup(str);
  163. char *p = tmp;
  164. while (*p) {
  165. *p = tolower(*p);
  166. p++;
  167. }
  168. return tmp;
  169. }
  170. /*--------------------------------------------------------- */
  171. const char *lub_string_nocasestr(const char *cs, const char *ct)
  172. {
  173. const char *p = NULL;
  174. const char *result = NULL;
  175. while (*cs) {
  176. const char *q = cs;
  177. p = ct;
  178. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  179. * MACRO implementation uses braces to prevent multiple increments
  180. * when called.
  181. */
  182. /*lint -e506 Constant value Boolean
  183. * not the case because of tolower() evaluating to 0 under lint
  184. * (see above)
  185. */
  186. while (*p && *q
  187. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  188. p++, q++;
  189. }
  190. if (0 == *p) {
  191. break;
  192. }
  193. cs++;
  194. }
  195. if (p && !*p) {
  196. /* we've found the first match of ct within cs */
  197. result = cs;
  198. }
  199. return result;
  200. }
  201. /*--------------------------------------------------------- */
  202. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  203. bool_t utf8)
  204. {
  205. unsigned int cnt = 0;
  206. if (!str1 || !str2)
  207. return cnt;
  208. while (*str1 && *str2) {
  209. if (*str1 != *str2)
  210. break;
  211. cnt++;
  212. str1++;
  213. str2++;
  214. }
  215. if (!utf8)
  216. return cnt;
  217. /* UTF8 features */
  218. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  219. cnt--;
  220. return cnt;
  221. }
  222. /*--------------------------------------------------------- */
  223. const char *lub_string_suffix(const char *string)
  224. {
  225. const char *p1, *p2;
  226. p1 = p2 = string;
  227. while (*p1) {
  228. if (lub_ctype_isspace(*p1)) {
  229. p2 = p1;
  230. p2++;
  231. }
  232. p1++;
  233. }
  234. return p2;
  235. }
  236. /*--------------------------------------------------------- */
  237. const char *lub_string_nextword(const char *string,
  238. size_t *len, size_t *offset, size_t *quoted)
  239. {
  240. const char *word;
  241. *quoted = 0;
  242. /* Find the start of a word (not including an opening quote) */
  243. while (*string && isspace(*string)) {
  244. string++;
  245. (*offset)++;
  246. }
  247. /* Is this the start of a quoted string ? */
  248. if (*string == '"') {
  249. *quoted = 1;
  250. string++;
  251. }
  252. word = string;
  253. *len = 0;
  254. /* Find the end of the word */
  255. while (*string) {
  256. if (*string == '\\') {
  257. string++;
  258. (*len)++;
  259. if (*string) {
  260. (*len)++;
  261. string++;
  262. }
  263. continue;
  264. }
  265. /* End of word */
  266. if (!*quoted && isspace(*string))
  267. break;
  268. if (*string == '"') {
  269. /* End of a quoted string */
  270. *quoted = 2;
  271. break;
  272. }
  273. (*len)++;
  274. string++;
  275. }
  276. return word;
  277. }
  278. /*--------------------------------------------------------- */
  279. unsigned int lub_string_wordcount(const char *line)
  280. {
  281. const char *word;
  282. unsigned int result = 0;
  283. size_t len = 0, offset = 0;
  284. size_t quoted;
  285. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  286. *word || quoted;
  287. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  288. /* account for the terminating quotation mark */
  289. len += quoted ? quoted - 1 : 0;
  290. result++;
  291. }
  292. return result;
  293. }
  294. /*--------------------------------------------------------- */