str.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <assert.h>
  11. #include "faux/ctype.h"
  12. #include "faux/str.h"
  13. /* TODO: Are that vars really needed? */
  14. //const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  15. //const char *lub_string_esc_regex = "^$.*+[](){}";
  16. //const char *lub_string_esc_quoted = "\\\"";
  17. /** @brief Free the memory allocated for the string.
  18. *
  19. * Safely free the memory allocated for the string. You can use NULL
  20. * pointer with this function. POSIX's free() checks for the NULL pointer
  21. * but not all systems do so.
  22. *
  23. * @param [in] str String to free
  24. */
  25. void faux_str_free(char *str) {
  26. faux_free(str);
  27. }
  28. /** @brief Duplicates the string.
  29. *
  30. * Duplicates the string. Same as standard strdup() function. Allocates
  31. * memory with malloc(). Checks for NULL pointer.
  32. *
  33. * @warning Resulting string must be freed by faux_str_free().
  34. *
  35. * @param [in] str String to duplicate.
  36. * @return Pointer to allocated string or NULL.
  37. */
  38. char *faux_str_dup(const char *str) {
  39. if (!str)
  40. return NULL;
  41. return strdup(str);
  42. }
  43. /** @brief Duplicates the first n bytes of the string.
  44. *
  45. * Duplicates at most n bytes of the string. Allocates
  46. * memory with malloc(). Checks for NULL pointer. Function will allocate
  47. * n + 1 bytes to store string and terminating null byte.
  48. *
  49. * @warning Resulting string must be freed by faux_str_free().
  50. *
  51. * @param [in] str String to duplicate.
  52. * @param [in] n Number of bytes to copy.
  53. * @return Pointer to allocated string or NULL.
  54. */
  55. char *faux_str_dupn(const char *str, size_t n) {
  56. char *res = NULL;
  57. size_t len = 0;
  58. if (!str)
  59. return NULL;
  60. len = strlen(str);
  61. len = (len < n) ? len : n;
  62. res = faux_zmalloc(len + 1);
  63. if (!res)
  64. return NULL;
  65. strncpy(res, str, len);
  66. res[len] = '\0';
  67. return res;
  68. }
  69. /** @brief Generates lowercase copy of input string.
  70. *
  71. * Allocates the copy of input string and convert that copy to lowercase.
  72. *
  73. * @warning Resulting string must be freed by faux_str_free().
  74. *
  75. * @param [in] str String to convert.
  76. * @return Pointer to lowercase string copy or NULL.
  77. */
  78. char *faux_str_tolower(const char *str)
  79. {
  80. char *res = faux_str_dup(str);
  81. char *p = res;
  82. if (!res)
  83. return NULL;
  84. while (*p) {
  85. *p = faux_ctype_tolower(*p);
  86. p++;
  87. }
  88. return res;
  89. }
  90. /** @brief Generates uppercase copy of input string.
  91. *
  92. * Allocates the copy of input string and convert that copy to uppercase.
  93. *
  94. * @warning Resulting string must be freed by faux_str_free().
  95. *
  96. * @param [in] str String to convert.
  97. * @return Pointer to lowercase string copy or NULL.
  98. */
  99. char *faux_str_toupper(const char *str)
  100. {
  101. char *res = faux_str_dup(str);
  102. char *p = res;
  103. if (!res)
  104. return NULL;
  105. while (*p) {
  106. *p = faux_ctype_toupper(*p);
  107. p++;
  108. }
  109. return res;
  110. }
  111. /** @brief Add n bytes of text to existent string.
  112. *
  113. * Concatenate two strings. Add n bytes of second string to the end of the
  114. * first one. The first argument is address of string pointer. The pointer
  115. * can be changed due to realloc() features. The first pointer can be NULL.
  116. * In this case the memory will be malloc()-ed and stored to the first pointer.
  117. *
  118. * @param [in,out] str Address of first string pointer.
  119. * @param [in] text Text to add to the first string.
  120. * @param [in] n Number of bytes to add.
  121. * @return Pointer to resulting string or NULL.
  122. */
  123. char *faux_str_catn(char **str, const char *text, size_t n) {
  124. size_t str_len = 0;
  125. size_t text_len = 0;
  126. char *res = NULL;
  127. char *p = NULL;
  128. if (!text)
  129. return *str;
  130. str_len = (*str) ? strlen(*str) : 0;
  131. text_len = strlen(text);
  132. text_len = (text_len < n) ? text_len : n;
  133. res = realloc(*str, str_len + text_len + 1);
  134. if (!res)
  135. return NULL;
  136. p = res + str_len;
  137. strncpy(p, text, text_len);
  138. p[text_len] = '\0';
  139. *str = res;
  140. return res;
  141. }
  142. /** @brief Add some text to existent string.
  143. *
  144. * Concatenate two strings. Add second string to the end of the first one.
  145. * The first argument is address of string pointer. The pointer can be
  146. * changed due to realloc() features. The first pointer can be NULL. In this
  147. * case the memory will be malloc()-ed and stored to the first pointer.
  148. *
  149. * @param [in,out] str Address of first string pointer.
  150. * @param [in] text Text to add to the first string.
  151. * @return Pointer to resulting string or NULL.
  152. */
  153. char *faux_str_cat(char **str, const char *text) {
  154. size_t len = 0;
  155. if (!text)
  156. return *str;
  157. len = strlen(text);
  158. return faux_str_catn(str, text, len);
  159. }
  160. /** @brief Service function to compare to chars in right way.
  161. *
  162. * The problem is char type can be signed or unsigned on different
  163. * platforms. So stright comparision can return different results.
  164. *
  165. * @param [in] char1 First char
  166. * @param [in] char2 Second char
  167. * @return
  168. * < 0 if char1 < char2
  169. * = 0 if char1 = char2
  170. * > 0 if char1 > char2
  171. */
  172. static int faux_str_cmp_chars(char char1, char char2) {
  173. unsigned char ch1 = (unsigned char)char1;
  174. unsigned char ch2 = (unsigned char)char2;
  175. return (int)ch1 - (int)ch2;
  176. }
  177. /** @brief Compare n first characters of two strings ignoring case.
  178. *
  179. * The difference beetween this function an standard strncasecmp() is
  180. * faux function uses faux ctype functions. It can be important for
  181. * portability.
  182. *
  183. * @param [in] str1 First string to compare.
  184. * @param [in] str2 Second string to compare.
  185. * @param [in] n Number of characters to compare.
  186. * @return < 0, 0, > 0, see the strcasecmp().
  187. */
  188. int faux_str_casecmpn(const char *str1, const char *str2, size_t n) {
  189. const char *p1 = str1;
  190. const char *p2 = str2;
  191. size_t num = n;
  192. while (*p1 != '\0' && *p2 != '\0' && num != 0) {
  193. int res = faux_str_cmp_chars(faux_ctype_tolower(*p1),
  194. faux_ctype_tolower(*p2));
  195. if (res != 0)
  196. return res;
  197. p1++;
  198. p2++;
  199. num--;
  200. }
  201. if (0 == n) // It means n first characters are equal.
  202. return 0;
  203. return faux_str_cmp_chars(faux_ctype_tolower(*p1),
  204. faux_ctype_tolower(*p2));
  205. }
  206. /** @brief Compare two strings ignoring case.
  207. *
  208. * The difference beetween this function an standard strcasecmp() is
  209. * faux function uses faux ctype functions. It can be important for
  210. * portability.
  211. *
  212. * @param [in] str1 First string to compare.
  213. * @param [in] str2 Second string to compare.
  214. * @return < 0, 0, > 0, see the strcasecmp().
  215. */
  216. int faux_str_casecmp(const char *str1, const char *str2) {
  217. const char *p1 = str1;
  218. const char *p2 = str2;
  219. while (*p1 != '\0' && *p2 != '\0') {
  220. int res = faux_str_cmp_chars(faux_ctype_tolower(*p1),
  221. faux_ctype_tolower(*p2));
  222. if (res != 0)
  223. return res;
  224. p1++;
  225. p2++;
  226. }
  227. return faux_str_cmp_chars(faux_ctype_tolower(*p1),
  228. faux_ctype_tolower(*p2));
  229. }
  230. /** @brief Finds the first occurrence of the substring in the string
  231. *
  232. * Function is a faux version of strcasestr() function.
  233. *
  234. * @param [in] haystack String to find substring in it.
  235. * @param [in] needle Substring to find.
  236. * @return
  237. * Pointer to first occurence of substring in the string.
  238. * NULL on error
  239. */
  240. char *faux_str_casestr(const char *haystack, const char *needle)
  241. {
  242. const char *ptr = haystack;
  243. size_t ptr_len = 0;
  244. size_t needle_len = 0;
  245. assert(haystack);
  246. assert(needle);
  247. if (!haystack || !needle)
  248. return NULL;
  249. ptr_len = strlen(haystack);
  250. needle_len = strlen(needle);
  251. while ((*ptr != '\0') && (ptr_len >= needle_len)) {
  252. int res = faux_str_casecmpn(ptr, needle, needle_len);
  253. if (0 == res)
  254. return (char *)ptr;
  255. ptr++;
  256. ptr_len--;
  257. }
  258. return NULL; // Not found
  259. }
  260. /* TODO: If it nedeed?
  261. const char *faux_str_nextword(const char *string,
  262. size_t *len, size_t *offset, size_t *quoted)
  263. {
  264. const char *word;
  265. *quoted = 0;
  266. // Find the start of a word (not including an opening quote)
  267. while (*string && isspace(*string)) {
  268. string++;
  269. (*offset)++;
  270. }
  271. // Is this the start of a quoted string ?
  272. if (*string == '"') {
  273. *quoted = 1;
  274. string++;
  275. }
  276. word = string;
  277. *len = 0;
  278. // Find the end of the word
  279. while (*string) {
  280. if (*string == '\\') {
  281. string++;
  282. (*len)++;
  283. if (*string) {
  284. (*len)++;
  285. string++;
  286. }
  287. continue;
  288. }
  289. // End of word
  290. if (!*quoted && isspace(*string))
  291. break;
  292. if (*string == '"') {
  293. // End of a quoted string
  294. *quoted = 2;
  295. break;
  296. }
  297. (*len)++;
  298. string++;
  299. }
  300. return word;
  301. }
  302. */
  303. // TODO: Is it needed?
  304. /*
  305. char *lub_string_ndecode(const char *string, unsigned int len)
  306. {
  307. const char *s = string;
  308. char *res, *p;
  309. int esc = 0;
  310. if (!string)
  311. return NULL;
  312. p = res = faux_zmalloc(len + 1);
  313. while (*s && (s < (string +len))) {
  314. if (!esc) {
  315. if ('\\' == *s)
  316. esc = 1;
  317. else
  318. *p = *s;
  319. } else {
  320. // switch (*s) {
  321. // case 'r':
  322. // case 'n':
  323. // *p = '\n';
  324. // break;
  325. // case 't':
  326. // *p = '\t';
  327. // break;
  328. // default:
  329. // *p = *s;
  330. // break;
  331. // }
  332. // *p = *s;
  333. esc = 0;
  334. }
  335. if (!esc)
  336. p++;
  337. s++;
  338. }
  339. *p = '\0';
  340. return res;
  341. }
  342. */
  343. // TODO: Is it needed?
  344. /*
  345. inline char *lub_string_decode(const char *string)
  346. {
  347. return lub_string_ndecode(string, strlen(string));
  348. }
  349. */
  350. // TODO: Is it needed?
  351. /*----------------------------------------------------------- */
  352. /*
  353. * This needs to escape any dangerous characters within the command line
  354. * to prevent gaining access to the underlying system shell.
  355. */
  356. /*
  357. char *lub_string_encode(const char *string, const char *escape_chars)
  358. {
  359. char *result = NULL;
  360. const char *p;
  361. if (!escape_chars)
  362. return lub_string_dup(string);
  363. if (string && !(*string)) // Empty string
  364. return lub_string_dup(string);
  365. for (p = string; p && *p; p++) {
  366. // find any special characters and prefix them with '\'
  367. size_t len = strcspn(p, escape_chars);
  368. lub_string_catn(&result, p, len);
  369. p += len;
  370. if (*p) {
  371. lub_string_catn(&result, "\\", 1);
  372. lub_string_catn(&result, p, 1);
  373. } else {
  374. break;
  375. }
  376. }
  377. return result;
  378. }
  379. */
  380. // TODO: Is it needed?
  381. /*--------------------------------------------------------- */
  382. /*
  383. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  384. bool_t utf8)
  385. {
  386. unsigned int cnt = 0;
  387. if (!str1 || !str2)
  388. return cnt;
  389. while (*str1 && *str2) {
  390. if (*str1 != *str2)
  391. break;
  392. cnt++;
  393. str1++;
  394. str2++;
  395. }
  396. if (!utf8)
  397. return cnt;
  398. // UTF8 features
  399. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  400. cnt--;
  401. return cnt;
  402. }
  403. */
  404. // TODO: Is it needed?
  405. /*--------------------------------------------------------- */
  406. /*
  407. const char *lub_string_suffix(const char *string)
  408. {
  409. const char *p1, *p2;
  410. p1 = p2 = string;
  411. while (*p1) {
  412. if (faux_ctype_isspace(*p1)) {
  413. p2 = p1;
  414. p2++;
  415. }
  416. p1++;
  417. }
  418. return p2;
  419. }
  420. */
  421. // TODO: Is it needed?
  422. /*--------------------------------------------------------- */
  423. /*
  424. unsigned int lub_string_wordcount(const char *line)
  425. {
  426. const char *word;
  427. unsigned int result = 0;
  428. size_t len = 0, offset = 0;
  429. size_t quoted;
  430. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  431. *word || quoted;
  432. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  433. // account for the terminating quotation mark
  434. len += quoted ? quoted - 1 : 0;
  435. result++;
  436. }
  437. return result;
  438. }
  439. */