str.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include "faux/ctype.h"
  11. #include "faux/str.h"
  12. /* TODO: Are that vars erally needed? */
  13. const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  14. const char *lub_string_esc_regex = "^$.*+[](){}";
  15. const char *lub_string_esc_quoted = "\\\"";
  16. /** @brief Free the memory allocated for the string.
  17. *
  18. * Safely free the memory allocated for the string. You can use NULL
  19. * pointer with this function. POSIX's free() checks for the NULL pointer
  20. * but not all systems do so. Function uses address of string pointer as an
  21. * argument to NULLify pointer after freeing.
  22. *
  23. * @param [in] str Address of string pointer
  24. */
  25. void faux_str_free(char **str) {
  26. if (!*str)
  27. return;
  28. free(*str);
  29. *str = NULL;
  30. }
  31. /*--------------------------------------------------------- */
  32. char *lub_string_ndecode(const char *string, unsigned int len)
  33. {
  34. const char *s = string;
  35. char *res, *p;
  36. int esc = 0;
  37. if (!string)
  38. return NULL;
  39. /* Allocate enough memory for result */
  40. p = res = malloc(len + 1);
  41. while (*s && (s < (string +len))) {
  42. if (!esc) {
  43. if ('\\' == *s)
  44. esc = 1;
  45. else
  46. *p = *s;
  47. } else {
  48. /* switch (*s) {
  49. case 'r':
  50. case 'n':
  51. *p = '\n';
  52. break;
  53. case 't':
  54. *p = '\t';
  55. break;
  56. default:
  57. *p = *s;
  58. break;
  59. }
  60. */ *p = *s;
  61. esc = 0;
  62. }
  63. if (!esc)
  64. p++;
  65. s++;
  66. }
  67. *p = '\0';
  68. return res;
  69. }
  70. /*--------------------------------------------------------- */
  71. inline char *lub_string_decode(const char *string)
  72. {
  73. return lub_string_ndecode(string, strlen(string));
  74. }
  75. /*----------------------------------------------------------- */
  76. /*
  77. * This needs to escape any dangerous characters within the command line
  78. * to prevent gaining access to the underlying system shell.
  79. */
  80. char *lub_string_encode(const char *string, const char *escape_chars)
  81. {
  82. char *result = NULL;
  83. const char *p;
  84. if (!escape_chars)
  85. return lub_string_dup(string);
  86. if (string && !(*string)) /* Empty string */
  87. return lub_string_dup(string);
  88. for (p = string; p && *p; p++) {
  89. /* find any special characters and prefix them with '\' */
  90. size_t len = strcspn(p, escape_chars);
  91. lub_string_catn(&result, p, len);
  92. p += len;
  93. if (*p) {
  94. lub_string_catn(&result, "\\", 1);
  95. lub_string_catn(&result, p, 1);
  96. } else {
  97. break;
  98. }
  99. }
  100. return result;
  101. }
  102. /*--------------------------------------------------------- */
  103. void lub_string_catn(char **string, const char *text, size_t len)
  104. {
  105. if (text) {
  106. char *q;
  107. size_t length, initlen, textlen = strlen(text);
  108. /* make sure the client cannot give us duff details */
  109. len = (len < textlen) ? len : textlen;
  110. /* remember the size of the original string */
  111. initlen = *string ? strlen(*string) : 0;
  112. /* account for '\0' */
  113. length = initlen + len + 1;
  114. /* allocate the memory for the result */
  115. q = realloc(*string, length);
  116. if (NULL != q) {
  117. *string = q;
  118. /* move to the end of the initial string */
  119. q += initlen;
  120. while (len--) {
  121. *q++ = *text++;
  122. }
  123. *q = '\0';
  124. }
  125. }
  126. }
  127. /*--------------------------------------------------------- */
  128. void lub_string_cat(char **string, const char *text)
  129. {
  130. size_t len = text ? strlen(text) : 0;
  131. lub_string_catn(string, text, len);
  132. }
  133. /*--------------------------------------------------------- */
  134. char *lub_string_dup(const char *string)
  135. {
  136. if (!string)
  137. return NULL;
  138. return strdup(string);
  139. }
  140. /*--------------------------------------------------------- */
  141. char *lub_string_dupn(const char *string, unsigned int len)
  142. {
  143. char *res = NULL;
  144. if (!string)
  145. return res;
  146. res = malloc(len + 1);
  147. strncpy(res, string, len);
  148. res[len] = '\0';
  149. return res;
  150. }
  151. /*--------------------------------------------------------- */
  152. int lub_string_nocasecmp(const char *cs, const char *ct)
  153. {
  154. int result = 0;
  155. while ((0 == result) && *cs && *ct) {
  156. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  157. * MACRO implementation uses braces to prevent multiple increments
  158. * when called.
  159. */
  160. int s = lub_ctype_tolower(*cs++);
  161. int t = lub_ctype_tolower(*ct++);
  162. result = s - t;
  163. }
  164. /*lint -e774 Boolean within 'if' always evealuates to True
  165. * not the case because of tolower() evaluating to 0 under lint
  166. * (see above)
  167. */
  168. if (0 == result) {
  169. /* account for different string lengths */
  170. result = *cs - *ct;
  171. }
  172. return result;
  173. }
  174. /*--------------------------------------------------------- */
  175. char *lub_string_tolower(const char *str)
  176. {
  177. char *tmp = strdup(str);
  178. char *p = tmp;
  179. while (*p) {
  180. *p = tolower(*p);
  181. p++;
  182. }
  183. return tmp;
  184. }
  185. /*--------------------------------------------------------- */
  186. const char *lub_string_nocasestr(const char *cs, const char *ct)
  187. {
  188. const char *p = NULL;
  189. const char *result = NULL;
  190. while (*cs) {
  191. const char *q = cs;
  192. p = ct;
  193. /*lint -e155 Ignoring { }'ed sequence within an expression, 0 assumed
  194. * MACRO implementation uses braces to prevent multiple increments
  195. * when called.
  196. */
  197. /*lint -e506 Constant value Boolean
  198. * not the case because of tolower() evaluating to 0 under lint
  199. * (see above)
  200. */
  201. while (*p && *q
  202. && (lub_ctype_tolower(*p) == lub_ctype_tolower(*q))) {
  203. p++, q++;
  204. }
  205. if (0 == *p) {
  206. break;
  207. }
  208. cs++;
  209. }
  210. if (p && !*p) {
  211. /* we've found the first match of ct within cs */
  212. result = cs;
  213. }
  214. return result;
  215. }
  216. /*--------------------------------------------------------- */
  217. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  218. bool_t utf8)
  219. {
  220. unsigned int cnt = 0;
  221. if (!str1 || !str2)
  222. return cnt;
  223. while (*str1 && *str2) {
  224. if (*str1 != *str2)
  225. break;
  226. cnt++;
  227. str1++;
  228. str2++;
  229. }
  230. if (!utf8)
  231. return cnt;
  232. /* UTF8 features */
  233. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  234. cnt--;
  235. return cnt;
  236. }
  237. /*--------------------------------------------------------- */
  238. const char *lub_string_suffix(const char *string)
  239. {
  240. const char *p1, *p2;
  241. p1 = p2 = string;
  242. while (*p1) {
  243. if (lub_ctype_isspace(*p1)) {
  244. p2 = p1;
  245. p2++;
  246. }
  247. p1++;
  248. }
  249. return p2;
  250. }
  251. /*--------------------------------------------------------- */
  252. const char *lub_string_nextword(const char *string,
  253. size_t *len, size_t *offset, size_t *quoted)
  254. {
  255. const char *word;
  256. *quoted = 0;
  257. /* Find the start of a word (not including an opening quote) */
  258. while (*string && isspace(*string)) {
  259. string++;
  260. (*offset)++;
  261. }
  262. /* Is this the start of a quoted string ? */
  263. if (*string == '"') {
  264. *quoted = 1;
  265. string++;
  266. }
  267. word = string;
  268. *len = 0;
  269. /* Find the end of the word */
  270. while (*string) {
  271. if (*string == '\\') {
  272. string++;
  273. (*len)++;
  274. if (*string) {
  275. (*len)++;
  276. string++;
  277. }
  278. continue;
  279. }
  280. /* End of word */
  281. if (!*quoted && isspace(*string))
  282. break;
  283. if (*string == '"') {
  284. /* End of a quoted string */
  285. *quoted = 2;
  286. break;
  287. }
  288. (*len)++;
  289. string++;
  290. }
  291. return word;
  292. }
  293. /*--------------------------------------------------------- */
  294. unsigned int lub_string_wordcount(const char *line)
  295. {
  296. const char *word;
  297. unsigned int result = 0;
  298. size_t len = 0, offset = 0;
  299. size_t quoted;
  300. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  301. *word || quoted;
  302. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  303. /* account for the terminating quotation mark */
  304. len += quoted ? quoted - 1 : 0;
  305. result++;
  306. }
  307. return result;
  308. }
  309. /*--------------------------------------------------------- */