str.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <assert.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include "faux/ctype.h"
  14. #include "faux/str.h"
  15. /* TODO: Are that vars really needed? */
  16. //const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  17. //const char *lub_string_esc_regex = "^$.*+[](){}";
  18. //const char *lub_string_esc_quoted = "\\\"";
  19. /** @brief Free the memory allocated for the string.
  20. *
  21. * Safely free the memory allocated for the string. You can use NULL
  22. * pointer with this function. POSIX's free() checks for the NULL pointer
  23. * but not all systems do so.
  24. *
  25. * @param [in] str String to free
  26. */
  27. void faux_str_free(char *str) {
  28. faux_free(str);
  29. }
  30. /** @brief Duplicates the string.
  31. *
  32. * Duplicates the string. Same as standard strdup() function. Allocates
  33. * memory with malloc(). Checks for NULL pointer.
  34. *
  35. * @warning Resulting string must be freed by faux_str_free().
  36. *
  37. * @param [in] str String to duplicate.
  38. * @return Pointer to allocated string or NULL.
  39. */
  40. char *faux_str_dup(const char *str) {
  41. if (!str)
  42. return NULL;
  43. return strdup(str);
  44. }
  45. /** @brief Duplicates the first n bytes of the string.
  46. *
  47. * Duplicates at most n bytes of the string. Allocates
  48. * memory with malloc(). Checks for NULL pointer. Function will allocate
  49. * n + 1 bytes to store string and terminating null byte.
  50. *
  51. * @warning Resulting string must be freed by faux_str_free().
  52. *
  53. * @param [in] str String to duplicate.
  54. * @param [in] n Number of bytes to copy.
  55. * @return Pointer to allocated string or NULL.
  56. */
  57. char *faux_str_dupn(const char *str, size_t n) {
  58. char *res = NULL;
  59. size_t len = 0;
  60. if (!str)
  61. return NULL;
  62. len = strlen(str);
  63. len = (len < n) ? len : n;
  64. res = faux_zmalloc(len + 1);
  65. if (!res)
  66. return NULL;
  67. strncpy(res, str, len);
  68. res[len] = '\0';
  69. return res;
  70. }
  71. /** @brief Generates lowercase copy of input string.
  72. *
  73. * Allocates the copy of input string and convert that copy to lowercase.
  74. *
  75. * @warning Resulting string must be freed by faux_str_free().
  76. *
  77. * @param [in] str String to convert.
  78. * @return Pointer to lowercase string copy or NULL.
  79. */
  80. char *faux_str_tolower(const char *str) {
  81. char *res = faux_str_dup(str);
  82. char *p = res;
  83. if (!res)
  84. return NULL;
  85. while (*p) {
  86. *p = faux_ctype_tolower(*p);
  87. p++;
  88. }
  89. return res;
  90. }
  91. /** @brief Generates uppercase copy of input string.
  92. *
  93. * Allocates the copy of input string and convert that copy to uppercase.
  94. *
  95. * @warning Resulting string must be freed by faux_str_free().
  96. *
  97. * @param [in] str String to convert.
  98. * @return Pointer to lowercase string copy or NULL.
  99. */
  100. char *faux_str_toupper(const char *str) {
  101. char *res = faux_str_dup(str);
  102. char *p = res;
  103. if (!res)
  104. return NULL;
  105. while (*p) {
  106. *p = faux_ctype_toupper(*p);
  107. p++;
  108. }
  109. return res;
  110. }
  111. /** @brief Add n bytes of text to existent string.
  112. *
  113. * Concatenate two strings. Add n bytes of second string to the end of the
  114. * first one. The first argument is address of string pointer. The pointer
  115. * can be changed due to realloc() features. The first pointer can be NULL.
  116. * In this case the memory will be malloc()-ed and stored to the first pointer.
  117. *
  118. * @param [in,out] str Address of first string pointer.
  119. * @param [in] text Text to add to the first string.
  120. * @param [in] n Number of bytes to add.
  121. * @return Pointer to resulting string or NULL.
  122. */
  123. char *faux_str_catn(char **str, const char *text, size_t n) {
  124. size_t str_len = 0;
  125. size_t text_len = 0;
  126. char *res = NULL;
  127. char *p = NULL;
  128. if (!text)
  129. return *str;
  130. str_len = (*str) ? strlen(*str) : 0;
  131. text_len = strlen(text);
  132. text_len = (text_len < n) ? text_len : n;
  133. res = realloc(*str, str_len + text_len + 1);
  134. if (!res)
  135. return NULL;
  136. p = res + str_len;
  137. strncpy(p, text, text_len);
  138. p[text_len] = '\0';
  139. *str = res;
  140. return res;
  141. }
  142. /** @brief Add some text to existent string.
  143. *
  144. * Concatenate two strings. Add second string to the end of the first one.
  145. * The first argument is address of string pointer. The pointer can be
  146. * changed due to realloc() features. The first pointer can be NULL. In this
  147. * case the memory will be malloc()-ed and stored to the first pointer.
  148. *
  149. * @param [in,out] str Address of first string pointer.
  150. * @param [in] text Text to add to the first string.
  151. * @return Pointer to resulting string or NULL.
  152. */
  153. char *faux_str_cat(char **str, const char *text) {
  154. size_t len = 0;
  155. if (!text)
  156. return *str;
  157. len = strlen(text);
  158. return faux_str_catn(str, text, len);
  159. }
  160. /** @brief Add multiply text strings to existent string.
  161. *
  162. * Concatenate multiply strings. Add next string to the end of the previous one.
  163. * The first argument is address of string pointer. The pointer can be
  164. * changed due to realloc() features. The first pointer can be NULL. In this
  165. * case the memory will be malloc()-ed and stored to the first pointer.
  166. * The last argument must be 'NULL'. It marks the last argument within
  167. * variable arguments list.
  168. *
  169. * @warning If last argument is not 'NULL' then behaviour is undefined.
  170. *
  171. * @param [in,out] str Address of first string pointer.
  172. * @param [in] text Text to add to the first string.
  173. * @return Pointer to resulting string or NULL.
  174. */
  175. char *faux_str_vcat(char **str, ...) {
  176. va_list ap;
  177. const char *arg = NULL;
  178. char *retval = *str;
  179. va_start(ap, str);
  180. while ((arg = va_arg(ap, const char *))) {
  181. retval = faux_str_cat(str, arg);
  182. }
  183. va_end(ap);
  184. return retval;
  185. }
  186. /** @brief Allocates memory and sprintf() to it.
  187. *
  188. * Function tries to find out necessary amount of memory for specified format
  189. * string and arguments. Format is same as for sprintf() function. Then
  190. * function allocates memory for resulting string and sprintf() to it. So
  191. * user doesn't need to allocate buffer himself. Function returns allocated
  192. * string that need to be freed by faux_str_free() function later.
  193. *
  194. * @warning The returned pointer must be free by faux_str_free().
  195. *
  196. * @param [in] fmt Format string like the sprintf()'s fmt.
  197. * @param [in] arg Number of arguments.
  198. * @return Allocated resulting string or NULL on error.
  199. */
  200. char *faux_str_sprintf(const char *fmt, ...) {
  201. int size = 1;
  202. char calc_buf[1] = "";
  203. char *line = NULL;
  204. va_list ap;
  205. // Calculate buffer size
  206. va_start(ap, fmt);
  207. size = vsnprintf(calc_buf, size, fmt, ap);
  208. va_end(ap);
  209. // The snprintf() prior to 2.0.6 glibc version returns -1 if string
  210. // was truncated. The later glibc returns required buffer size.
  211. // The calc_buf can be NULL and size can be 0 for recent glibc but
  212. // probably some exotic implementations can break on it. So use
  213. // minimal buffer with length = 1.
  214. if (size < 0)
  215. return NULL;
  216. size++; // Additional byte for '\0'
  217. line = faux_zmalloc(size);
  218. if (!line) // Memory problems
  219. return NULL;
  220. // Format real string
  221. va_start(ap, fmt);
  222. size = vsnprintf(line, size, fmt, ap);
  223. va_end(ap);
  224. if (size < 0) { // Some problems
  225. faux_str_free(line);
  226. return NULL;
  227. }
  228. return line;
  229. }
  230. /** @brief Service function to compare to chars in right way.
  231. *
  232. * The problem is char type can be signed or unsigned on different
  233. * platforms. So stright comparision can return different results.
  234. *
  235. * @param [in] char1 First char
  236. * @param [in] char2 Second char
  237. * @return
  238. * < 0 if char1 < char2
  239. * = 0 if char1 = char2
  240. * > 0 if char1 > char2
  241. */
  242. static int faux_str_cmp_chars(char char1, char char2) {
  243. unsigned char ch1 = (unsigned char)char1;
  244. unsigned char ch2 = (unsigned char)char2;
  245. return (int)ch1 - (int)ch2;
  246. }
  247. /** @brief Compare n first characters of two strings ignoring case.
  248. *
  249. * The difference beetween this function an standard strncasecmp() is
  250. * faux function uses faux ctype functions. It can be important for
  251. * portability.
  252. *
  253. * @param [in] str1 First string to compare.
  254. * @param [in] str2 Second string to compare.
  255. * @param [in] n Number of characters to compare.
  256. * @return < 0, 0, > 0, see the strcasecmp().
  257. */
  258. int faux_str_casecmpn(const char *str1, const char *str2, size_t n) {
  259. const char *p1 = str1;
  260. const char *p2 = str2;
  261. size_t num = n;
  262. while (*p1 != '\0' && *p2 != '\0' && num != 0) {
  263. int res = faux_str_cmp_chars(
  264. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  265. if (res != 0)
  266. return res;
  267. p1++;
  268. p2++;
  269. num--;
  270. }
  271. if (0 == n) // It means n first characters are equal.
  272. return 0;
  273. return faux_str_cmp_chars(
  274. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  275. }
  276. /** @brief Compare two strings ignoring case.
  277. *
  278. * The difference beetween this function an standard strcasecmp() is
  279. * faux function uses faux ctype functions. It can be important for
  280. * portability.
  281. *
  282. * @param [in] str1 First string to compare.
  283. * @param [in] str2 Second string to compare.
  284. * @return < 0, 0, > 0, see the strcasecmp().
  285. */
  286. int faux_str_casecmp(const char *str1, const char *str2) {
  287. const char *p1 = str1;
  288. const char *p2 = str2;
  289. while (*p1 != '\0' && *p2 != '\0') {
  290. int res = faux_str_cmp_chars(
  291. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  292. if (res != 0)
  293. return res;
  294. p1++;
  295. p2++;
  296. }
  297. return faux_str_cmp_chars(
  298. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  299. }
  300. /** @brief Finds the first occurrence of the substring in the string
  301. *
  302. * Function is a faux version of strcasestr() function.
  303. *
  304. * @param [in] haystack String to find substring in it.
  305. * @param [in] needle Substring to find.
  306. * @return
  307. * Pointer to first occurence of substring in the string.
  308. * NULL on error
  309. */
  310. char *faux_str_casestr(const char *haystack, const char *needle) {
  311. const char *ptr = haystack;
  312. size_t ptr_len = 0;
  313. size_t needle_len = 0;
  314. assert(haystack);
  315. assert(needle);
  316. if (!haystack || !needle)
  317. return NULL;
  318. ptr_len = strlen(haystack);
  319. needle_len = strlen(needle);
  320. while ((*ptr != '\0') && (ptr_len >= needle_len)) {
  321. int res = faux_str_casecmpn(ptr, needle, needle_len);
  322. if (0 == res)
  323. return (char *)ptr;
  324. ptr++;
  325. ptr_len--;
  326. }
  327. return NULL; // Not found
  328. }
  329. /** Prepare string for embedding to C-code (make escaping).
  330. *
  331. * @warning The returned pointer must be freed by faux_str_free().
  332. * @param [in] src String for escaping.
  333. * @return Escaped string or NULL on error.
  334. */
  335. char *faux_str_c_esc(const char *src) {
  336. const char *src_ptr = src;
  337. char *dst = NULL;
  338. char *dst_ptr = NULL;
  339. char *escaped = NULL;
  340. size_t src_len = 0;
  341. size_t dst_len = 0;
  342. assert(src);
  343. if (!src)
  344. return NULL;
  345. src_len = strlen(src);
  346. // Calculate max destination string size.
  347. // The worst case is when each src character will be replaced by
  348. // something like '\xff'. So it's 4 dst chars for 1 src one.
  349. dst_len = (src_len * 4) + 1; // one byte for '\0'
  350. dst = faux_zmalloc(dst_len);
  351. assert(dst);
  352. if (!dst)
  353. return NULL;
  354. dst_ptr = dst;
  355. while (*src_ptr != '\0') {
  356. char *esc = NULL; // escaped replacement
  357. char buf[5]; // longest 'char' (4 bytes) + '\0'
  358. size_t len = 0;
  359. switch (*src_ptr) {
  360. case '\n':
  361. esc = "\\n";
  362. break;
  363. case '\"':
  364. esc = "\\\"";
  365. break;
  366. case '\\':
  367. esc = "\\\\";
  368. break;
  369. case '\'':
  370. esc = "\\\'";
  371. break;
  372. case '\r':
  373. esc = "\\r";
  374. break;
  375. case '\t':
  376. esc = "\\t";
  377. break;
  378. default:
  379. // Check is the symbol control character. Control
  380. // characters has codes from 0x00 to 0x1f.
  381. if (((unsigned char)*src_ptr & 0xe0) == 0) { // control
  382. snprintf(buf, sizeof(buf), "\\x%02x",
  383. (unsigned char)*src_ptr);
  384. buf[4] = '\0'; // for safety
  385. } else {
  386. buf[0] = *src_ptr; // Common character
  387. buf[1] = '\0';
  388. }
  389. esc = buf;
  390. break;
  391. }
  392. len = strlen(esc);
  393. memcpy(dst_ptr, esc, len); // zmalloc() nullify the rest
  394. dst_ptr += len;
  395. src_ptr++;
  396. }
  397. escaped = faux_str_dup(dst); // Free some memory
  398. faux_str_free(dst); // 'dst' size >= 'escaped' size
  399. return escaped;
  400. }
  401. #define BYTE_CONV_LEN 4 // Length of one byte converted to string
  402. /** Prepare binary block for embedding to C-code.
  403. *
  404. * @warning The returned pointer must be freed by faux_str_free().
  405. * @param [in] src Binary block for conversion.
  406. * @return C-string or NULL on error.
  407. */
  408. char *faux_str_c_bin(const char *src, size_t n) {
  409. const char *src_ptr = src;
  410. char *dst = NULL;
  411. char *dst_ptr = NULL;
  412. size_t dst_len = 0;
  413. assert(src);
  414. if (!src)
  415. return NULL;
  416. // Calculate destination string size.
  417. // Each src character will be replaced by
  418. // something like '\xff'. So it's 4 dst chars for 1 src char.
  419. dst_len = (n * BYTE_CONV_LEN) + 1; // one byte for '\0'
  420. dst = faux_zmalloc(dst_len);
  421. assert(dst);
  422. if (!dst)
  423. return NULL;
  424. dst_ptr = dst;
  425. while (src_ptr < (src + n)) {
  426. char buf[BYTE_CONV_LEN + 1]; // longest 'char' (4 bytes) + '\0'
  427. snprintf(buf, sizeof(buf), "\\x%02x", (unsigned char)*src_ptr);
  428. memcpy(dst_ptr, buf, BYTE_CONV_LEN); // zmalloc() nullify the rest
  429. dst_ptr += BYTE_CONV_LEN;
  430. src_ptr++;
  431. }
  432. return dst;
  433. }
  434. /** @brief Search the n-th chars of string for one of the specified chars.
  435. *
  436. * The function search for any of specified characters within string.
  437. * The search is limited to first n characters of the string. If
  438. * terminating '\0' is before n-th character then search will stop on
  439. * it. Can be used with raw memory block.
  440. *
  441. * @param [in] str String (or memory block) to search in.
  442. * @param [in] chars_to_string Chars enumeration to search for.
  443. * @param [in] n Maximum number of bytes to search within.
  444. * @return Pointer to the first occurence of one of specified chars.
  445. * NULL on error.
  446. */
  447. char *faux_str_charsn(const char *str, const char *chars_to_search, size_t n) {
  448. const char *current_char = str;
  449. size_t len = n;
  450. assert(str);
  451. assert(chars_to_search);
  452. if (!str || !chars_to_search)
  453. return NULL;
  454. while ((*current_char != '\0') && (len > 0)) {
  455. if (strchr(chars_to_search, *current_char))
  456. return (char *)current_char;
  457. current_char++;
  458. len--;
  459. }
  460. return NULL;
  461. }
  462. /** @brief Search string for one of the specified chars.
  463. *
  464. * The function search for any of specified characters within string.
  465. *
  466. * @param [in] str String to search in.
  467. * @param [in] chars_to_string Chars enumeration to search for.
  468. * @return Pointer to the first occurence of one of specified chars.
  469. * NULL on error.
  470. */
  471. char *faux_str_chars(const char *str, const char *chars_to_search) {
  472. assert(str);
  473. if (!str)
  474. return NULL;
  475. return faux_str_charsn(str, chars_to_search, strlen(str));
  476. }
  477. /* TODO: If it nedeed?
  478. const char *faux_str_nextword(const char *string,
  479. size_t *len, size_t *offset, size_t *quoted)
  480. {
  481. const char *word;
  482. *quoted = 0;
  483. // Find the start of a word (not including an opening quote)
  484. while (*string && isspace(*string)) {
  485. string++;
  486. (*offset)++;
  487. }
  488. // Is this the start of a quoted string ?
  489. if (*string == '"') {
  490. *quoted = 1;
  491. string++;
  492. }
  493. word = string;
  494. *len = 0;
  495. // Find the end of the word
  496. while (*string) {
  497. if (*string == '\\') {
  498. string++;
  499. (*len)++;
  500. if (*string) {
  501. (*len)++;
  502. string++;
  503. }
  504. continue;
  505. }
  506. // End of word
  507. if (!*quoted && isspace(*string))
  508. break;
  509. if (*string == '"') {
  510. // End of a quoted string
  511. *quoted = 2;
  512. break;
  513. }
  514. (*len)++;
  515. string++;
  516. }
  517. return word;
  518. }
  519. */
  520. // TODO: Is it needed?
  521. /*
  522. char *lub_string_ndecode(const char *string, unsigned int len)
  523. {
  524. const char *s = string;
  525. char *res, *p;
  526. int esc = 0;
  527. if (!string)
  528. return NULL;
  529. p = res = faux_zmalloc(len + 1);
  530. while (*s && (s < (string +len))) {
  531. if (!esc) {
  532. if ('\\' == *s)
  533. esc = 1;
  534. else
  535. *p = *s;
  536. } else {
  537. // switch (*s) {
  538. // case 'r':
  539. // case 'n':
  540. // *p = '\n';
  541. // break;
  542. // case 't':
  543. // *p = '\t';
  544. // break;
  545. // default:
  546. // *p = *s;
  547. // break;
  548. // }
  549. // *p = *s;
  550. esc = 0;
  551. }
  552. if (!esc)
  553. p++;
  554. s++;
  555. }
  556. *p = '\0';
  557. return res;
  558. }
  559. */
  560. // TODO: Is it needed?
  561. /*
  562. inline char *lub_string_decode(const char *string)
  563. {
  564. return lub_string_ndecode(string, strlen(string));
  565. }
  566. */
  567. // TODO: Is it needed?
  568. /*----------------------------------------------------------- */
  569. /*
  570. * This needs to escape any dangerous characters within the command line
  571. * to prevent gaining access to the underlying system shell.
  572. */
  573. /*
  574. char *lub_string_encode(const char *string, const char *escape_chars)
  575. {
  576. char *result = NULL;
  577. const char *p;
  578. if (!escape_chars)
  579. return lub_string_dup(string);
  580. if (string && !(*string)) // Empty string
  581. return lub_string_dup(string);
  582. for (p = string; p && *p; p++) {
  583. // find any special characters and prefix them with '\'
  584. size_t len = strcspn(p, escape_chars);
  585. lub_string_catn(&result, p, len);
  586. p += len;
  587. if (*p) {
  588. lub_string_catn(&result, "\\", 1);
  589. lub_string_catn(&result, p, 1);
  590. } else {
  591. break;
  592. }
  593. }
  594. return result;
  595. }
  596. */
  597. // TODO: Is it needed?
  598. /*--------------------------------------------------------- */
  599. /*
  600. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  601. bool_t utf8)
  602. {
  603. unsigned int cnt = 0;
  604. if (!str1 || !str2)
  605. return cnt;
  606. while (*str1 && *str2) {
  607. if (*str1 != *str2)
  608. break;
  609. cnt++;
  610. str1++;
  611. str2++;
  612. }
  613. if (!utf8)
  614. return cnt;
  615. // UTF8 features
  616. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  617. cnt--;
  618. return cnt;
  619. }
  620. */
  621. // TODO: Is it needed?
  622. /*--------------------------------------------------------- */
  623. /*
  624. const char *lub_string_suffix(const char *string)
  625. {
  626. const char *p1, *p2;
  627. p1 = p2 = string;
  628. while (*p1) {
  629. if (faux_ctype_isspace(*p1)) {
  630. p2 = p1;
  631. p2++;
  632. }
  633. p1++;
  634. }
  635. return p2;
  636. }
  637. */
  638. // TODO: Is it needed?
  639. /*--------------------------------------------------------- */
  640. /*
  641. unsigned int lub_string_wordcount(const char *line)
  642. {
  643. const char *word;
  644. unsigned int result = 0;
  645. size_t len = 0, offset = 0;
  646. size_t quoted;
  647. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  648. *word || quoted;
  649. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  650. // account for the terminating quotation mark
  651. len += quoted ? quoted - 1 : 0;
  652. result++;
  653. }
  654. return result;
  655. }
  656. */