str.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. /** @file str.c
  2. * @brief String related functions
  3. *
  4. * This file implements some often used string functions.
  5. * Some functions are more portable versions of standard
  6. * functions but others are original ones.
  7. */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <assert.h>
  11. #include <stdio.h>
  12. #include "faux/ctype.h"
  13. #include "faux/str.h"
  14. /* TODO: Are that vars really needed? */
  15. //const char *lub_string_esc_default = "`|$<>&()#;\\\"!";
  16. //const char *lub_string_esc_regex = "^$.*+[](){}";
  17. //const char *lub_string_esc_quoted = "\\\"";
  18. /** @brief Free the memory allocated for the string.
  19. *
  20. * Safely free the memory allocated for the string. You can use NULL
  21. * pointer with this function. POSIX's free() checks for the NULL pointer
  22. * but not all systems do so.
  23. *
  24. * @param [in] str String to free
  25. */
  26. void faux_str_free(char *str) {
  27. faux_free(str);
  28. }
  29. /** @brief Duplicates the string.
  30. *
  31. * Duplicates the string. Same as standard strdup() function. Allocates
  32. * memory with malloc(). Checks for NULL pointer.
  33. *
  34. * @warning Resulting string must be freed by faux_str_free().
  35. *
  36. * @param [in] str String to duplicate.
  37. * @return Pointer to allocated string or NULL.
  38. */
  39. char *faux_str_dup(const char *str) {
  40. if (!str)
  41. return NULL;
  42. return strdup(str);
  43. }
  44. /** @brief Duplicates the first n bytes of the string.
  45. *
  46. * Duplicates at most n bytes of the string. Allocates
  47. * memory with malloc(). Checks for NULL pointer. Function will allocate
  48. * n + 1 bytes to store string and terminating null byte.
  49. *
  50. * @warning Resulting string must be freed by faux_str_free().
  51. *
  52. * @param [in] str String to duplicate.
  53. * @param [in] n Number of bytes to copy.
  54. * @return Pointer to allocated string or NULL.
  55. */
  56. char *faux_str_dupn(const char *str, size_t n) {
  57. char *res = NULL;
  58. size_t len = 0;
  59. if (!str)
  60. return NULL;
  61. len = strlen(str);
  62. len = (len < n) ? len : n;
  63. res = faux_zmalloc(len + 1);
  64. if (!res)
  65. return NULL;
  66. strncpy(res, str, len);
  67. res[len] = '\0';
  68. return res;
  69. }
  70. /** @brief Generates lowercase copy of input string.
  71. *
  72. * Allocates the copy of input string and convert that copy to lowercase.
  73. *
  74. * @warning Resulting string must be freed by faux_str_free().
  75. *
  76. * @param [in] str String to convert.
  77. * @return Pointer to lowercase string copy or NULL.
  78. */
  79. char *faux_str_tolower(const char *str) {
  80. char *res = faux_str_dup(str);
  81. char *p = res;
  82. if (!res)
  83. return NULL;
  84. while (*p) {
  85. *p = faux_ctype_tolower(*p);
  86. p++;
  87. }
  88. return res;
  89. }
  90. /** @brief Generates uppercase copy of input string.
  91. *
  92. * Allocates the copy of input string and convert that copy to uppercase.
  93. *
  94. * @warning Resulting string must be freed by faux_str_free().
  95. *
  96. * @param [in] str String to convert.
  97. * @return Pointer to lowercase string copy or NULL.
  98. */
  99. char *faux_str_toupper(const char *str) {
  100. char *res = faux_str_dup(str);
  101. char *p = res;
  102. if (!res)
  103. return NULL;
  104. while (*p) {
  105. *p = faux_ctype_toupper(*p);
  106. p++;
  107. }
  108. return res;
  109. }
  110. /** @brief Add n bytes of text to existent string.
  111. *
  112. * Concatenate two strings. Add n bytes of second string to the end of the
  113. * first one. The first argument is address of string pointer. The pointer
  114. * can be changed due to realloc() features. The first pointer can be NULL.
  115. * In this case the memory will be malloc()-ed and stored to the first pointer.
  116. *
  117. * @param [in,out] str Address of first string pointer.
  118. * @param [in] text Text to add to the first string.
  119. * @param [in] n Number of bytes to add.
  120. * @return Pointer to resulting string or NULL.
  121. */
  122. char *faux_str_catn(char **str, const char *text, size_t n) {
  123. size_t str_len = 0;
  124. size_t text_len = 0;
  125. char *res = NULL;
  126. char *p = NULL;
  127. if (!text)
  128. return *str;
  129. str_len = (*str) ? strlen(*str) : 0;
  130. text_len = strlen(text);
  131. text_len = (text_len < n) ? text_len : n;
  132. res = realloc(*str, str_len + text_len + 1);
  133. if (!res)
  134. return NULL;
  135. p = res + str_len;
  136. strncpy(p, text, text_len);
  137. p[text_len] = '\0';
  138. *str = res;
  139. return res;
  140. }
  141. /** @brief Add some text to existent string.
  142. *
  143. * Concatenate two strings. Add second string to the end of the first one.
  144. * The first argument is address of string pointer. The pointer can be
  145. * changed due to realloc() features. The first pointer can be NULL. In this
  146. * case the memory will be malloc()-ed and stored to the first pointer.
  147. *
  148. * @param [in,out] str Address of first string pointer.
  149. * @param [in] text Text to add to the first string.
  150. * @return Pointer to resulting string or NULL.
  151. */
  152. char *faux_str_cat(char **str, const char *text) {
  153. size_t len = 0;
  154. if (!text)
  155. return *str;
  156. len = strlen(text);
  157. return faux_str_catn(str, text, len);
  158. }
  159. /** @brief Service function to compare to chars in right way.
  160. *
  161. * The problem is char type can be signed or unsigned on different
  162. * platforms. So stright comparision can return different results.
  163. *
  164. * @param [in] char1 First char
  165. * @param [in] char2 Second char
  166. * @return
  167. * < 0 if char1 < char2
  168. * = 0 if char1 = char2
  169. * > 0 if char1 > char2
  170. */
  171. static int faux_str_cmp_chars(char char1, char char2) {
  172. unsigned char ch1 = (unsigned char)char1;
  173. unsigned char ch2 = (unsigned char)char2;
  174. return (int)ch1 - (int)ch2;
  175. }
  176. /** @brief Compare n first characters of two strings ignoring case.
  177. *
  178. * The difference beetween this function an standard strncasecmp() is
  179. * faux function uses faux ctype functions. It can be important for
  180. * portability.
  181. *
  182. * @param [in] str1 First string to compare.
  183. * @param [in] str2 Second string to compare.
  184. * @param [in] n Number of characters to compare.
  185. * @return < 0, 0, > 0, see the strcasecmp().
  186. */
  187. int faux_str_casecmpn(const char *str1, const char *str2, size_t n) {
  188. const char *p1 = str1;
  189. const char *p2 = str2;
  190. size_t num = n;
  191. while (*p1 != '\0' && *p2 != '\0' && num != 0) {
  192. int res = faux_str_cmp_chars(
  193. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  194. if (res != 0)
  195. return res;
  196. p1++;
  197. p2++;
  198. num--;
  199. }
  200. if (0 == n) // It means n first characters are equal.
  201. return 0;
  202. return faux_str_cmp_chars(
  203. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  204. }
  205. /** @brief Compare two strings ignoring case.
  206. *
  207. * The difference beetween this function an standard strcasecmp() is
  208. * faux function uses faux ctype functions. It can be important for
  209. * portability.
  210. *
  211. * @param [in] str1 First string to compare.
  212. * @param [in] str2 Second string to compare.
  213. * @return < 0, 0, > 0, see the strcasecmp().
  214. */
  215. int faux_str_casecmp(const char *str1, const char *str2) {
  216. const char *p1 = str1;
  217. const char *p2 = str2;
  218. while (*p1 != '\0' && *p2 != '\0') {
  219. int res = faux_str_cmp_chars(
  220. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  221. if (res != 0)
  222. return res;
  223. p1++;
  224. p2++;
  225. }
  226. return faux_str_cmp_chars(
  227. faux_ctype_tolower(*p1), faux_ctype_tolower(*p2));
  228. }
  229. /** @brief Finds the first occurrence of the substring in the string
  230. *
  231. * Function is a faux version of strcasestr() function.
  232. *
  233. * @param [in] haystack String to find substring in it.
  234. * @param [in] needle Substring to find.
  235. * @return
  236. * Pointer to first occurence of substring in the string.
  237. * NULL on error
  238. */
  239. char *faux_str_casestr(const char *haystack, const char *needle) {
  240. const char *ptr = haystack;
  241. size_t ptr_len = 0;
  242. size_t needle_len = 0;
  243. assert(haystack);
  244. assert(needle);
  245. if (!haystack || !needle)
  246. return NULL;
  247. ptr_len = strlen(haystack);
  248. needle_len = strlen(needle);
  249. while ((*ptr != '\0') && (ptr_len >= needle_len)) {
  250. int res = faux_str_casecmpn(ptr, needle, needle_len);
  251. if (0 == res)
  252. return (char *)ptr;
  253. ptr++;
  254. ptr_len--;
  255. }
  256. return NULL; // Not found
  257. }
  258. /** Prepare string for embedding to C-code (make escaping).
  259. *
  260. * @warning The returned pointer must be freed by faux_str_free().
  261. * @param [in] src String for escaping.
  262. * @return Escaped string or NULL on error.
  263. */
  264. char *faux_str_c_esc(const char *src) {
  265. const char *src_ptr = src;
  266. char *dst = NULL;
  267. char *dst_ptr = NULL;
  268. char *escaped = NULL;
  269. size_t src_len = 0;
  270. size_t dst_len = 0;
  271. assert(src);
  272. if (!src)
  273. return NULL;
  274. src_len = strlen(src);
  275. // Calculate max destination string size.
  276. // The worst case is when each src character will be replaced by
  277. // something like '\xff'. So it's 4 dst chars for 1 src one.
  278. dst_len = (src_len * 4) + 1; // one byte for '\0'
  279. dst = faux_zmalloc(dst_len);
  280. assert(dst);
  281. if (!dst)
  282. return NULL;
  283. dst_ptr = dst;
  284. while (*src_ptr != '\0') {
  285. char *esc = NULL; // escaped replacement
  286. char buf[5]; // longest 'char' (4 bytes) + '\0'
  287. size_t len = 0;
  288. switch (*src_ptr) {
  289. case '\n':
  290. esc = "\\n";
  291. break;
  292. case '\"':
  293. esc = "\\\"";
  294. break;
  295. case '\\':
  296. esc = "\\\\";
  297. break;
  298. case '\'':
  299. esc = "\\\'";
  300. break;
  301. case '\r':
  302. esc = "\\r";
  303. break;
  304. case '\t':
  305. esc = "\\t";
  306. break;
  307. default:
  308. // Check is the symbol control character. Control
  309. // characters has codes from 0x00 to 0x1f.
  310. if (((unsigned char)*src_ptr & 0xe0) == 0) { // control
  311. snprintf(buf, sizeof(buf), "\\x%02x",
  312. (unsigned char)*src_ptr);
  313. buf[4] = '\0'; // for safety
  314. } else {
  315. buf[0] = *src_ptr; // Common character
  316. buf[1] = '\0';
  317. }
  318. esc = buf;
  319. break;
  320. }
  321. len = strlen(esc);
  322. memcpy(dst_ptr, esc, len); // zmalloc() nullify the rest
  323. dst_ptr += len;
  324. src_ptr++;
  325. }
  326. escaped = faux_str_dup(dst); // Free some memory
  327. faux_str_free(dst); // 'dst' size >= 'escaped' size
  328. return escaped;
  329. }
  330. /** @brief Search the n-th chars of string for one of the specified chars.
  331. *
  332. * The function search for any of specified characters within string.
  333. * The search is limited to first n characters of the string. If
  334. * terminating '\0' is before n-th character then search will stop on
  335. * it. Can be used with raw memory block.
  336. *
  337. * @param [in] str String (or memory block) to search in.
  338. * @param [in] chars_to_string Chars enumeration to search for.
  339. * @param [in] n Maximum number of bytes to search within.
  340. * @return Pointer to the first occurence of one of specified chars.
  341. * NULL on error.
  342. */
  343. char *faux_str_charsn(const char *str, const char *chars_to_search, size_t n) {
  344. const char *current_char = str;
  345. size_t len = n;
  346. assert(str);
  347. assert(chars_to_search);
  348. if (!str || !chars_to_search)
  349. return NULL;
  350. while ((*current_char != '\0') && (len > 0)) {
  351. if (strchr(chars_to_search, *current_char))
  352. return (char *)current_char;
  353. current_char++;
  354. len--;
  355. }
  356. return NULL;
  357. }
  358. /** @brief Search string for one of the specified chars.
  359. *
  360. * The function search for any of specified characters within string.
  361. *
  362. * @param [in] str String to search in.
  363. * @param [in] chars_to_string Chars enumeration to search for.
  364. * @return Pointer to the first occurence of one of specified chars.
  365. * NULL on error.
  366. */
  367. char *faux_str_chars(const char *str, const char *chars_to_search) {
  368. assert(str);
  369. if (!str)
  370. return NULL;
  371. return faux_str_charsn(str, chars_to_search, strlen(str));
  372. }
  373. /* TODO: If it nedeed?
  374. const char *faux_str_nextword(const char *string,
  375. size_t *len, size_t *offset, size_t *quoted)
  376. {
  377. const char *word;
  378. *quoted = 0;
  379. // Find the start of a word (not including an opening quote)
  380. while (*string && isspace(*string)) {
  381. string++;
  382. (*offset)++;
  383. }
  384. // Is this the start of a quoted string ?
  385. if (*string == '"') {
  386. *quoted = 1;
  387. string++;
  388. }
  389. word = string;
  390. *len = 0;
  391. // Find the end of the word
  392. while (*string) {
  393. if (*string == '\\') {
  394. string++;
  395. (*len)++;
  396. if (*string) {
  397. (*len)++;
  398. string++;
  399. }
  400. continue;
  401. }
  402. // End of word
  403. if (!*quoted && isspace(*string))
  404. break;
  405. if (*string == '"') {
  406. // End of a quoted string
  407. *quoted = 2;
  408. break;
  409. }
  410. (*len)++;
  411. string++;
  412. }
  413. return word;
  414. }
  415. */
  416. // TODO: Is it needed?
  417. /*
  418. char *lub_string_ndecode(const char *string, unsigned int len)
  419. {
  420. const char *s = string;
  421. char *res, *p;
  422. int esc = 0;
  423. if (!string)
  424. return NULL;
  425. p = res = faux_zmalloc(len + 1);
  426. while (*s && (s < (string +len))) {
  427. if (!esc) {
  428. if ('\\' == *s)
  429. esc = 1;
  430. else
  431. *p = *s;
  432. } else {
  433. // switch (*s) {
  434. // case 'r':
  435. // case 'n':
  436. // *p = '\n';
  437. // break;
  438. // case 't':
  439. // *p = '\t';
  440. // break;
  441. // default:
  442. // *p = *s;
  443. // break;
  444. // }
  445. // *p = *s;
  446. esc = 0;
  447. }
  448. if (!esc)
  449. p++;
  450. s++;
  451. }
  452. *p = '\0';
  453. return res;
  454. }
  455. */
  456. // TODO: Is it needed?
  457. /*
  458. inline char *lub_string_decode(const char *string)
  459. {
  460. return lub_string_ndecode(string, strlen(string));
  461. }
  462. */
  463. // TODO: Is it needed?
  464. /*----------------------------------------------------------- */
  465. /*
  466. * This needs to escape any dangerous characters within the command line
  467. * to prevent gaining access to the underlying system shell.
  468. */
  469. /*
  470. char *lub_string_encode(const char *string, const char *escape_chars)
  471. {
  472. char *result = NULL;
  473. const char *p;
  474. if (!escape_chars)
  475. return lub_string_dup(string);
  476. if (string && !(*string)) // Empty string
  477. return lub_string_dup(string);
  478. for (p = string; p && *p; p++) {
  479. // find any special characters and prefix them with '\'
  480. size_t len = strcspn(p, escape_chars);
  481. lub_string_catn(&result, p, len);
  482. p += len;
  483. if (*p) {
  484. lub_string_catn(&result, "\\", 1);
  485. lub_string_catn(&result, p, 1);
  486. } else {
  487. break;
  488. }
  489. }
  490. return result;
  491. }
  492. */
  493. // TODO: Is it needed?
  494. /*--------------------------------------------------------- */
  495. /*
  496. unsigned int lub_string_equal_part(const char *str1, const char *str2,
  497. bool_t utf8)
  498. {
  499. unsigned int cnt = 0;
  500. if (!str1 || !str2)
  501. return cnt;
  502. while (*str1 && *str2) {
  503. if (*str1 != *str2)
  504. break;
  505. cnt++;
  506. str1++;
  507. str2++;
  508. }
  509. if (!utf8)
  510. return cnt;
  511. // UTF8 features
  512. if (cnt && (UTF8_11 == (*(str1 - 1) & UTF8_MASK)))
  513. cnt--;
  514. return cnt;
  515. }
  516. */
  517. // TODO: Is it needed?
  518. /*--------------------------------------------------------- */
  519. /*
  520. const char *lub_string_suffix(const char *string)
  521. {
  522. const char *p1, *p2;
  523. p1 = p2 = string;
  524. while (*p1) {
  525. if (faux_ctype_isspace(*p1)) {
  526. p2 = p1;
  527. p2++;
  528. }
  529. p1++;
  530. }
  531. return p2;
  532. }
  533. */
  534. // TODO: Is it needed?
  535. /*--------------------------------------------------------- */
  536. /*
  537. unsigned int lub_string_wordcount(const char *line)
  538. {
  539. const char *word;
  540. unsigned int result = 0;
  541. size_t len = 0, offset = 0;
  542. size_t quoted;
  543. for (word = lub_string_nextword(line, &len, &offset, &quoted);
  544. *word || quoted;
  545. word = lub_string_nextword(word + len, &len, &offset, &quoted)) {
  546. // account for the terminating quotation mark
  547. len += quoted ? quoted - 1 : 0;
  548. result++;
  549. }
  550. return result;
  551. }
  552. */