str.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include "faux/ctype.h"
  11. #include "faux/str.h"
  12. /* TODO: Are that vars erally needed? */
  13. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  14. const char *lub_string_esc_regex = "^$.*+[](){}";
  15. const char *lub_string_esc_quoted = "\\\"";
  16. /** @brief Free the memory allocated for the string.
  17. *
  18. * Safely free the memory allocated for the string. You can use NULL
  19. * pointer with this function. POSIX's free() checks for the NULL pointer
  20. * but not all systems do so. Function uses address of string pointer as an
  21. * argument to NULLify pointer after freeing.
  22. *
  23. * @param [in] str Address of string pointer
  24. */
  25. void faux_str_free(char **str) {
  26. if (!*str)
  27. return;
  28. free(*str);
  29. *str = NULL;
  30. }
  31. /*--------------------------------------------------------- */
  32. char *lub_string_dup(const char *string)
  33. {
  34. if (!string)
  35. return NULL;
  36. return strdup(string);
  37. }
  38. /*--------------------------------------------------------- */
  39. char *lub_string_dupn(const char *string, unsigned int len)
  40. {
  41. char *res = NULL;
  42. if (!string)
  43. return res;
  44. res = malloc(len + 1);
  45. strncpy(res, string, len);
  46. res[len] = '\0';
  47. return res;
  48. }
  49. /*--------------------------------------------------------- */
  50. char *lub_string_ndecode(const char *string, unsigned int len)
  51. {
  52. const char *s = string;
  53. char *res, *p;
  54. int esc = 0;
  55. if (!string)
  56. return NULL;
  57. /* Allocate enough memory for result */
  58. p = res = malloc(len + 1);
  59. while (*s && (s < (string +len))) {
  60. if (!esc) {
  61. if ('\\' == *s)
  62. esc = 1;
  63. else
  64. *p = *s;
  65. } else {
  66. /* switch (*s) {
  67. case 'r':
  68. case 'n':
  69. *p = '\n';
  70. break;
  71. case 't':
  72. *p = '\t';
  73. break;
  74. default:
  75. *p = *s;
  76. break;
  77. }
  78. */ *p = *s;
  79. esc = 0;
  80. }
  81. if (!esc)
  82. p++;
  83. s++;
  84. }
  85. *p = '\0';
  86. return res;
  87. }
  88. /*--------------------------------------------------------- */
  89. inline char *lub_string_decode(const char *string)
  90. {
  91. return lub_string_ndecode(string, strlen(string));
  92. }
  93. /*----------------------------------------------------------- */
  94. /*
  95. * This needs to escape any dangerous characters within the command line
  96. * to prevent gaining access to the underlying system shell.
  97. */
  98. char *lub_string_encode(const char *string, const char *escape_chars)
  99. {
  100. char *result = NULL;
  101. const char *p;
  102. if (!escape_chars)
  103. return lub_string_dup(string);
  104. if (string && !(*string)) /* Empty string */
  105. return lub_string_dup(string);
  106. for (p = string; p && *p; p++) {
  107. /* find any special characters and prefix them with '\' */
  108. size_t len = strcspn(p, escape_chars);
  109. lub_string_catn(&result, p, len);
  110. p += len;
  111. if (*p) {
  112. lub_string_catn(&result, "\\", 1);
  113. lub_string_catn(&result, p, 1);
  114. } else {
  115. break;
  116. }
  117. }
  118. return result;
  119. }
  120. /*--------------------------------------------------------- */
  121. void lub_string_catn(char **string, const char *text, size_t len)
  122. {
  123. if (text) {
  124. char *q;
  125. size_t length, initlen, textlen = strlen(text);
  126. /* make sure the client cannot give us duff details */
  127. len = (len < textlen) ? len : textlen;
  128. /* remember the size of the original string */
  129. initlen = *string ? strlen(*string) : 0;
  130. /* account for '\0' */
  131. length = initlen + len + 1;
  132. /* allocate the memory for the result */
  133. q = realloc(*string, length);
  134. if (NULL != q) {
  135. *string = q;
  136. /* move to the end of the initial string */
  137. q += initlen;
  138. while (len--) {
  139. *q++ = *text++;
  140. }
  141. *q = '\0';
  142. }
  143. }
  144. }
  145. /*--------------------------------------------------------- */
  146. void lub_string_cat(char **string, const char *text)
  147. {
  148. size_t len = text ? strlen(text) : 0;
  149. lub_string_catn(string, text, len);
  150. }
  151. /*--------------------------------------------------------- */
  152. int lub_string_nocasecmp(const char *cs, const char *ct)
  153. {
  154. int result = 0;
  155. while ((0 == result) && *cs && *ct) {
  156. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  157. * MACRO implementation uses braces to prevent multiple increments
  158. * when called.
  159. */
  160. int s = faux_ctype_tolower(*cs++);
  161. int t = faux_ctype_tolower(*ct++);
  162. result = s - t;
  163. }
  164. /*lint -e774 Boolean within 'if' always evealuates to True
  165. * not the case because of tolower() evaluating to 0 under lint
  166. * (see above)
  167. */
  168. if (0 == result) {
  169. /* account for different string lengths */
  170. result = *cs - *ct;
  171. }
  172. return result;
  173. }
  174. /*--------------------------------------------------------- */
  175. char *lub_string_tolower(const char *str)
  176. {
  177. char *tmp = strdup(str);
  178. char *p = tmp;
  179. while (*p) {
  180. *p = tolower(*p);
  181. p++;
  182. }
  183. return tmp;
  184. }
  185. /*--------------------------------------------------------- */
  186. const char *lub_string_nocasestr(const char *cs, const char *ct)
  187. {
  188. const char *p = NULL;
  189. const char *result = NULL;
  190. while (*cs) {
  191. const char *q = cs;
  192. p = ct;
  193. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  194. * MACRO implementation uses braces to prevent multiple increments
  195. * when called.
  196. */
  197. /*lint -e506 Constant value Boolean
  198. * not the case because of tolower() evaluating to 0 under lint
  199. * (see above)
  200. */
  201. while (*p && *q
  202. && (faux_ctype_tolower(*p) == faux_ctype_tolower(*q))) {
  203. p++, q++;
  204. }
  205. if (0 == *p) {
  206. break;
  207. }
  208. cs++;
  209. }
  210. if (p && !*p) {
  211. /* we've found the first match of ct within cs */
  212. result = cs;
  213. }
  214. return result;
  215. }
  216. /*--------------------------------------------------------- */
  217. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  218. bool_t utf8)
  219. {
  220. unsigned int cnt = 0;
  221. if (!str1 || !str2)
  222. return cnt;
  223. while (*str1 && *str2) {
  224. if (*str1 != *str2)
  225. break;
  226. cnt++;
  227. str1++;
  228. str2++;
  229. }
  230. if (!utf8)
  231. return cnt;
  232. /* UTF8 features */
  233. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  234. cnt--;
  235. return cnt;
  236. }
  237. /*--------------------------------------------------------- */
  238. const char *lub_string_suffix(const char *string)
  239. {
  240. const char *p1, *p2;
  241. p1 = p2 = string;
  242. while (*p1) {
  243. if (faux_ctype_isspace(*p1)) {
  244. p2 = p1;
  245. p2++;
  246. }
  247. p1++;
  248. }
  249. return p2;
  250. }
  251. /*--------------------------------------------------------- */
  252. const char *lub_string_nextword(const char *string,
  253. size_t *len, size_t *offset, size_t *quoted)
  254. {
  255. const char *word;
  256. *quoted = 0;
  257. /* Find the start of a word (not including an opening quote) */
  258. while (*string && isspace(*string)) {
  259. string++;
  260. (*offset)++;
  261. }
  262. /* Is this the start of a quoted string ? */
  263. if (*string == '"') {
  264. *quoted = 1;
  265. string++;
  266. }
  267. word = string;
  268. *len = 0;
  269. /* Find the end of the word */
  270. while (*string) {
  271. if (*string == '\\') {
  272. string++;
  273. (*len)++;
  274. if (*string) {
  275. (*len)++;
  276. string++;
  277. }
  278. continue;
  279. }
  280. /* End of word */
  281. if (!*quoted && isspace(*string))
  282. break;
  283. if (*string == '"') {
  284. /* End of a quoted string */
  285. *quoted = 2;
  286. break;
  287. }
  288. (*len)++;
  289. string++;
  290. }
  291. return word;
  292. }
  293. /*--------------------------------------------------------- */
  294. unsigned int lub_string_wordcount(const char *line)
  295. {
  296. const char *word;
  297. unsigned int result = 0;
  298. size_t len = 0, offset = 0;
  299. size_t quoted;
  300. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  301. *word || quoted;
  302. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  303. /* account for the terminating quotation mark */
  304. len += quoted ? quoted - 1 : 0;
  305. result++;
  306. }
  307. return result;
  308. }
  309. /*--------------------------------------------------------- */