str.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /** @file str.c
  2. * About this file2
  3. */
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "faux/ctype.h"
  7. #include "faux/str.h"
  8. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  9. const char *lub_string_esc_regex = "^$.*+[](){}";
  10. const char *lub_string_esc_quoted = "\\\"";
  11. /** @brief Free the memory allocated for the string.
  12. *
  13. * Safely free the memory allocated for the string. You can use NULL
  14. * pointer with this function. POSIX's free() checks for the NULL pointer
  15. * but not all systems do so.
  16. *
  17. * @param [in] ptr Pointer to allocated string
  18. */
  19. void lub_string_free(char *ptr)
  20. {
  21. if (!ptr)
  22. return;
  23. free(ptr);
  24. }
  25. /*--------------------------------------------------------- */
  26. char *lub_string_ndecode(const char *string, unsigned int len)
  27. {
  28. const char *s = string;
  29. char *res, *p;
  30. int esc = 0;
  31. if (!string)
  32. return NULL;
  33. /* Allocate enough memory for result */
  34. p = res = malloc(len + 1);
  35. while (*s && (s < (string +len))) {
  36. if (!esc) {
  37. if ('\\' == *s)
  38. esc = 1;
  39. else
  40. *p = *s;
  41. } else {
  42. /* switch (*s) {
  43. case 'r':
  44. case 'n':
  45. *p = '\n';
  46. break;
  47. case 't':
  48. *p = '\t';
  49. break;
  50. default:
  51. *p = *s;
  52. break;
  53. }
  54. */ *p = *s;
  55. esc = 0;
  56. }
  57. if (!esc)
  58. p++;
  59. s++;
  60. }
  61. *p = '\0';
  62. return res;
  63. }
  64. /*--------------------------------------------------------- */
  65. inline char *lub_string_decode(const char *string)
  66. {
  67. return lub_string_ndecode(string, strlen(string));
  68. }
  69. /*----------------------------------------------------------- */
  70. /*
  71. * This needs to escape any dangerous characters within the command line
  72. * to prevent gaining access to the underlying system shell.
  73. */
  74. char *lub_string_encode(const char *string, const char *escape_chars)
  75. {
  76. char *result = NULL;
  77. const char *p;
  78. if (!escape_chars)
  79. return lub_string_dup(string);
  80. if (string && !(*string)) /* Empty string */
  81. return lub_string_dup(string);
  82. for (p = string; p && *p; p++) {
  83. /* find any special characters and prefix them with '\' */
  84. size_t len = strcspn(p, escape_chars);
  85. lub_string_catn(&result, p, len);
  86. p += len;
  87. if (*p) {
  88. lub_string_catn(&result, "\\", 1);
  89. lub_string_catn(&result, p, 1);
  90. } else {
  91. break;
  92. }
  93. }
  94. return result;
  95. }
  96. /*--------------------------------------------------------- */
  97. void lub_string_catn(char **string, const char *text, size_t len)
  98. {
  99. if (text) {
  100. char *q;
  101. size_t length, initlen, textlen = strlen(text);
  102. /* make sure the client cannot give us duff details */
  103. len = (len < textlen) ? len : textlen;
  104. /* remember the size of the original string */
  105. initlen = *string ? strlen(*string) : 0;
  106. /* account for '\0' */
  107. length = initlen + len + 1;
  108. /* allocate the memory for the result */
  109. q = realloc(*string, length);
  110. if (NULL != q) {
  111. *string = q;
  112. /* move to the end of the initial string */
  113. q += initlen;
  114. while (len--) {
  115. *q++ = *text++;
  116. }
  117. *q = '\0';
  118. }
  119. }
  120. }
  121. /*--------------------------------------------------------- */
  122. void lub_string_cat(char **string, const char *text)
  123. {
  124. size_t len = text ? strlen(text) : 0;
  125. lub_string_catn(string, text, len);
  126. }
  127. /*--------------------------------------------------------- */
  128. char *lub_string_dup(const char *string)
  129. {
  130. if (!string)
  131. return NULL;
  132. return strdup(string);
  133. }
  134. /*--------------------------------------------------------- */
  135. char *lub_string_dupn(const char *string, unsigned int len)
  136. {
  137. char *res = NULL;
  138. if (!string)
  139. return res;
  140. res = malloc(len + 1);
  141. strncpy(res, string, len);
  142. res[len] = '\0';
  143. return res;
  144. }
  145. /*--------------------------------------------------------- */
  146. int lub_string_nocasecmp(const char *cs, const char *ct)
  147. {
  148. int result = 0;
  149. while ((0 == result) && *cs && *ct) {
  150. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  151. * MACRO implementation uses braces to prevent multiple increments
  152. * when called.
  153. */
  154. int s = lub_ctype_tolower(*cs++);
  155. int t = lub_ctype_tolower(*ct++);
  156. result = s - t;
  157. }
  158. /*lint -e774 Boolean within 'if' always evealuates to True
  159. * not the case because of tolower() evaluating to 0 under lint
  160. * (see above)
  161. */
  162. if (0 == result) {
  163. /* account for different string lengths */
  164. result = *cs - *ct;
  165. }
  166. return result;
  167. }
  168. /*--------------------------------------------------------- */
  169. char *lub_string_tolower(const char *str)
  170. {
  171. char *tmp = strdup(str);
  172. char *p = tmp;
  173. while (*p) {
  174. *p = tolower(*p);
  175. p++;
  176. }
  177. return tmp;
  178. }
  179. /*--------------------------------------------------------- */
  180. const char *lub_string_nocasestr(const char *cs, const char *ct)
  181. {
  182. const char *p = NULL;
  183. const char *result = NULL;
  184. while (*cs) {
  185. const char *q = cs;
  186. p = ct;
  187. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  188. * MACRO implementation uses braces to prevent multiple increments
  189. * when called.
  190. */
  191. /*lint -e506 Constant value Boolean
  192. * not the case because of tolower() evaluating to 0 under lint
  193. * (see above)
  194. */
  195. while (*p && *q
  196. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  197. p++, q++;
  198. }
  199. if (0 == *p) {
  200. break;
  201. }
  202. cs++;
  203. }
  204. if (p && !*p) {
  205. /* we've found the first match of ct within cs */
  206. result = cs;
  207. }
  208. return result;
  209. }
  210. /*--------------------------------------------------------- */
  211. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  212. bool_t utf8)
  213. {
  214. unsigned int cnt = 0;
  215. if (!str1 || !str2)
  216. return cnt;
  217. while (*str1 && *str2) {
  218. if (*str1 != *str2)
  219. break;
  220. cnt++;
  221. str1++;
  222. str2++;
  223. }
  224. if (!utf8)
  225. return cnt;
  226. /* UTF8 features */
  227. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  228. cnt--;
  229. return cnt;
  230. }
  231. /*--------------------------------------------------------- */
  232. const char *lub_string_suffix(const char *string)
  233. {
  234. const char *p1, *p2;
  235. p1 = p2 = string;
  236. while (*p1) {
  237. if (lub_ctype_isspace(*p1)) {
  238. p2 = p1;
  239. p2++;
  240. }
  241. p1++;
  242. }
  243. return p2;
  244. }
  245. /*--------------------------------------------------------- */
  246. const char *lub_string_nextword(const char *string,
  247. size_t *len, size_t *offset, size_t *quoted)
  248. {
  249. const char *word;
  250. *quoted = 0;
  251. /* Find the start of a word (not including an opening quote) */
  252. while (*string && isspace(*string)) {
  253. string++;
  254. (*offset)++;
  255. }
  256. /* Is this the start of a quoted string ? */
  257. if (*string == '"') {
  258. *quoted = 1;
  259. string++;
  260. }
  261. word = string;
  262. *len = 0;
  263. /* Find the end of the word */
  264. while (*string) {
  265. if (*string == '\\') {
  266. string++;
  267. (*len)++;
  268. if (*string) {
  269. (*len)++;
  270. string++;
  271. }
  272. continue;
  273. }
  274. /* End of word */
  275. if (!*quoted && isspace(*string))
  276. break;
  277. if (*string == '"') {
  278. /* End of a quoted string */
  279. *quoted = 2;
  280. break;
  281. }
  282. (*len)++;
  283. string++;
  284. }
  285. return word;
  286. }
  287. /*--------------------------------------------------------- */
  288. unsigned int lub_string_wordcount(const char *line)
  289. {
  290. const char *word;
  291. unsigned int result = 0;
  292. size_t len = 0, offset = 0;
  293. size_t quoted;
  294. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  295. *word || quoted;
  296. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  297. /* account for the terminating quotation mark */
  298. len += quoted ? quoted - 1 : 0;
  299. result++;
  300. }
  301. return result;
  302. }
  303. /*--------------------------------------------------------- */