BRL-CAD
str.h
Go to the documentation of this file.
1 /* S T R . H
2  * BRL-CAD
3  *
4  * Copyright (c) 2004-2024 United States Government as represented by
5  * the U.S. Army Research Laboratory.
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public License
9  * version 2.1 as published by the Free Software Foundation.
10  *
11  * This library is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this file; see the file named COPYING for more
18  * information.
19  */
20 
21 #ifndef BU_STR_H
22 #define BU_STR_H
23 
24 #include "common.h"
25 #include "bu/defines.h"
26 
27 __BEGIN_DECLS
28 
29 /** @addtogroup bu_str
30  *
31  * @brief
32  * Compatibility routines to various string processing functions
33  * including strlcat and strlcpy.
34  *
35  */
36 /** @{ */
37 /** @file bu/str.h */
38 
39 /**
40  * concatenate one string onto the end of another, returning the
41  * length of the dst string after the concatenation.
42  *
43  * bu_strlcat() is a macro to bu_strlcatm() so that we can report the
44  * file name and line number of any erroneous callers.
45  */
46 BU_EXPORT extern size_t bu_strlcatm(char *dst, const char *src, size_t size, const char *label);
47 #define bu_strlcat(dst, src, size) bu_strlcatm(dst, src, size, CPP_FILELINE)
48 
49 /**
50  * copies one string into another, returning the length of the dst
51  * string after the copy.
52  *
53  * bu_strlcpy() is a macro to bu_strlcpym() so that we can report the
54  * file name and line number of any erroneous callers.
55  */
56 BU_EXPORT extern size_t bu_strlcpym(char *dst, const char *src, size_t size, const char *label);
57 #define bu_strlcpy(dst, src, size) bu_strlcpym(dst, src, size, CPP_FILELINE)
58 
59 /**
60  * Given a string, allocate enough memory to hold it using
61  * bu_malloc(), duplicate the strings, returns a pointer to the new
62  * string.
63  *
64  * bu_strdup() is a macro that includes the current file name and line
65  * number that can be used when bu debugging is enabled.
66  */
67 BU_EXPORT extern char *bu_strdupm(const char *cp, const char *label);
68 #define bu_strdup(s) bu_strdupm(s, CPP_FILELINE)
69 
70 /**
71  * Compares two strings for equality. It performs the comparison more
72  * robustly than the standard library's strcmp() function by defining
73  * consistent behavior for NULL and empty strings. It accepts NULL as
74  * valid input values and considers "" and NULL as equal. Returns 0
75  * if the strings match.
76  */
77 BU_EXPORT extern int bu_strcmp(const char *string1, const char *string2);
78 
79 /**
80  * Compares two strings for equality. No more than n-characters are
81  * compared. It performs the comparison more robustly than the
82  * standard library's strncmp() function by defining consistent
83  * behavior for NULL and empty strings. It accepts NULL as valid
84  * input values and considers "" and NULL as equal. Returns 0 if the
85  * strings match.
86  */
87 BU_EXPORT extern int bu_strncmp(const char *string1, const char *string2, size_t n);
88 
89 /**
90  * Compares two strings for equality without regard for the case in
91  * the string. It performs the comparison more robustly than the
92  * standard strcasecmp()/stricmp() function by defining consistent
93  * behavior for NULL and empty strings. It accepts NULL as valid
94  * input values and considers "" and NULL as equal. Returns 0 if the
95  * strings match.
96  */
97 BU_EXPORT extern int bu_strcasecmp(const char *string1, const char *string2);
98 
99 /**
100  * Compares two strings for equality without regard for the case in
101  * the string. No more than n-characters are compared. It performs
102  * the comparison more robustly than the standard
103  * strncasecmp()/strnicmp() function by defining consistent behavior
104  * for NULL and empty strings. It accepts NULL as valid input values
105  * and considers "" and NULL as equal. Returns 0 if the strings
106  * match.
107  */
108 BU_EXPORT extern int bu_strncasecmp(const char *string1, const char *string2, size_t n);
109 
110 /**
111  * BU_STR_EMPTY() is a convenience macro that tests a string for
112  * emptiness, i.e. "" or NULL.
113  */
114 #define BU_STR_EMPTY(s) (bu_strcmp((s), "") == 0)
115 
116 /**
117  * BU_STR_EQUAL() is a convenience macro for testing two
118  * null-terminated strings for equality. It is equivalent to
119  * (bu_strcmp(s1, s2) == 0) whereby NULL strings are allowed and
120  * equivalent to an empty string. Evaluates true when the strings
121  * match and false if they do not.
122  */
123 #define BU_STR_EQUAL(s1, s2) (bu_strcmp((s1), (s2)) == 0)
124 
125 /**
126  * BU_STR_EQUIV() is a convenience macro that compares two
127  * null-terminated strings for equality without regard for case. Two
128  * strings are equivalent if they are a case-insensitive match. NULL
129  * strings are allowed and equivalent to an empty string. Evaluates
130  * true if the strings are similar and false if they are not.
131  */
132 #define BU_STR_EQUIV(s1, s2) (bu_strcasecmp((s1), (s2)) == 0)
133 
134 
135 /**
136  * @brief
137  * These routines implement support for escaping and unescaping
138  * generalized strings that may represent filesystem paths, URLs,
139  * object lists, and more.
140  *
141  */
142 
143 /**
144  * Escapes an input string with preceding '\'s for any characters
145  * defined in the 'expression' string. The input string is written to the
146  * specified output buffer of 'size' capacity. The input and output
147  * pointers may overlap or be the same memory (assuming adequate space
148  * is available). If 'output' is NULL, then dynamic memory will be
149  * allocated and returned.
150  *
151  * The 'expression' parameter is a regular "bracket expression"
152  * commonly used in globbing and POSIX regular expression character
153  * matching. An expression can be either a matching list (default) or
154  * a non-matching list (starting with a circumflex '^' character).
155  * For example, "abc" matches any of the characters 'a', 'b', or 'c'.
156  * Specifying a non-matching list expression matches any character
157  * except for the ones listed after the circumflex. For example,
158  * "^abc" matches any character except 'a', 'b', or 'c'.
159  *
160  * Backslash escape sequences are not allowed (e.g., \\t or \\x01) as
161  * '\' will be matched literally.
162  *
163  * A range expression consists of two characters separated by a hyphen
164  * and will match any single character between the two characters.
165  * For example, "0-9a-c" is equivalent to "0123456789abc". To match a
166  * '-' dash literally, include it as the last or first (after any '^')
167  * character within the expression.
168  *
169  * The expression may also contain named character classes but only
170  * for ASCII input strings:
171  *
172  * [:alnum:] Alphanumeric characters: a-zA-Z0-9
173  * [:alpha:] Alphabetic characters: a-zA-Z
174  * [:blank:] Space and TAB characters
175  * [:cntrl:] Control characters: ASCII 0x00-0X7F
176  * [:digit:] Numeric characters: 0-9
177  * [:graph:] Characters that are both printable and visible: ASCII 0x21-0X7E
178  * [:lower:] Lowercase alphabetic characters: a-z
179  * [:print:] Visible and space characters (not control characters): ASCII 0x20-0X7E
180  * [:punct:] Punctuation characters (not letters, digits, control, or space): ][!"#$%&'()*+,./:;<=>?@^_`{|}~-\
181  * [:upper:] Uppercase alphabetic characters: A-Z
182  * [:xdigit:] Hexadecimal digits: a-fA-F0-9
183  * [:word:] (non-POSIX) Alphanumeric plus underscore: a-zA-Z0-9_
184  *
185  * A non-NULL output string is always returned. This allows
186  * expression chaining and embedding as function arguments but care
187  * should be taken to free the dynamic memory being returned when
188  * 'output' is NULL.
189  *
190  * If output 'size' is inadequate for holding the escaped input
191  * string, bu_bomb() is called.
192  *
193  * Example:
194  @code
195  char *result;
196  char buf[128];
197  result = bu_str_escape("my fair lady", " ", buf, 128);
198  :: result == buf == "my\ fair\ lady"
199  result = bu_str_escape(buf, "\", NULL, 0);
200  :: result == "my\\ fair\\ lady"
201  :: buf == "my\ fair\ lady"
202  bu_free(result, "bu_str_escape");
203  result = bu_str_escape(buf, "a-zA-Z", buf, 128);
204  :: result == buf == "\m\y\ \f\a\i\r\ \l\a\d\y"
205  @endcode
206  *
207  * This function should be thread safe and re-entrant if the
208  * input/output buffers are not shared (and strlen() is threadsafe).
209  */
210 BU_EXPORT extern char *bu_str_escape(const char *input, const char *expression, char *output, size_t size);
211 
212 
213 /**
214  * Removes one level of '\' escapes from an input string. The input
215  * string is written to the specified output buffer of 'size'
216  * capacity. The input and output pointers may overlap or be the same
217  * memory. If 'output' is NULL, then dynamic memory will be allocated
218  * and returned.
219  *
220  * A non-NULL output string is always returned. This allows
221  * expression chaining and embedding as function arguments but care
222  * should be taken to free the dynamic memory being returned when
223  * 'output' is NULL.
224  *
225  * If output 'size' is inadequate for holding the unescaped input
226  * string, bu_bomb() is called.
227  *
228  * Example:
229  @code
230  char *result;
231  char buf[128];
232  result = bu_str_unescape("\m\y\\ \f\a\i\r\\ \l\a\d\y", buf, 128);
233  :: result == buf == "my\ fair\ lady"
234  result = bu_str_unescape(buf, NULL, 0);
235  :: result == "my fair lady"
236  :: buf == "my\ fair\ lady"
237  bu_free(result, "bu_str_unescape");
238  @endcode
239  *
240  * This function should be thread safe and re-entrant if the
241  * input/output buffers are not shared (and strlen() is threadsafe).
242  */
243 BU_EXPORT extern char *bu_str_unescape(const char *input, char *output, size_t size);
244 
245 
246 /**
247  * Determine if given C-string consists solely of printable chars.
248  *
249  * Returns 0 if any non-printable chars are encountered (not including
250  * the terminating nul character), or non-zero otherwise.
251  */
252 BU_EXPORT extern int bu_str_isprint(const char *cp);
253 
254 
255 /** @brief routines for parsing boolean values from strings */
256 
257 /**
258  * Returns truthfully if a given input string represents an
259  * "affirmative string".
260  *
261  * Input values that are case-insensitively NULL, empty (i.e., ""
262  * after disregarding whitespace), "n", "no", "false", "off",
263  * "(null)", or are 0-valued return as false. Any other input value
264  * returns true.
265  *
266  * Strings strongly indicating true such as "y", "yes", "true", "on",
267  * or are 1-valued will return as 1, other non-empty strings still
268  * return as true but may be greater than 1.
269  */
270 BU_EXPORT extern int bu_str_true(const char *str);
271 
272 
273 /**
274  * Returns truthfully if a given input string represents a
275  * "negative string".
276  *
277  * Input values that are case-insensitively NULL, empty (i.e., ""
278  * after disregarding whitespace), "n", "no", "false", "off",
279  * "(null)", or are 0-valued return as true. Any other input value
280  * returns as false.
281  */
282 BU_EXPORT extern int bu_str_false(const char *str);
283 
284 
285 /** @brief Functions related to argv processing. */
286 
287 /**
288  * Build argv[] array from input buffer, by splitting whitespace
289  * separated "words" into null terminated strings.
290  *
291  * 'lim' indicates the maximum number of elements that can be stored
292  * in the argv[] array not including a terminating NULL.
293  *
294  * The 'lp' input buffer is altered by this process. The argv[] array
295  * points into the input buffer.
296  *
297  * The argv[] array needs to have at least lim+1 pointers allocated
298  * for lim items plus a terminating pointer to NULL. The input buffer
299  * should not be freed until argv has been freed or passes out of
300  * scope.
301  *
302  * Returns -
303  * 0 no words in input
304  * argc number of words of input, now in argv[]
305  */
306 BU_EXPORT extern size_t bu_argv_from_string(char *argv[],
307  size_t lim,
308  char *lp);
309 
310 
311 /**
312  * libbu replacement for Tcl's ascii list to argc/argv functionality
313  * (Note: function signature duplicates that of Tcl_SplitList)
314  *
315  * Caller is responsible for freeing output argv array with bu_free(),
316  * but NOT the strings within argv.
317  *
318  * @param list_str input string from caller
319  * @param[out] argc pointer to variable that will hold number of entries in argv
320  * @param[out] argv pointer to the parsed array of list items
321  *
322  * Returns 0 if parsing was successful
323  */
324 BU_EXPORT extern int bu_argv_from_tcl_list(const char *list_str,
325  int *argc,
326  const char ***argv);
327 
328 /**
329  * Deallocate all strings in a given argv array and the array itself
330  *
331  * This call presumes the array has been allocated with bu_argv_dup()
332  * or bu_path_to_argv().
333  */
334 BU_EXPORT extern void bu_argv_free(size_t argc, char *argv[]);
335 
336 /**
337  * free up to argc elements of memory allocated to an array without
338  * free'ing the array itself.
339  */
340 BU_EXPORT extern void bu_free_args(size_t argc, char *argv[], const char *str);
341 
342 /**
343  * Dynamically duplicate an argv array and all elements in the array
344  *
345  * Duplicate an argv array by duplicating all strings and the array
346  * itself. It is the caller's responsibility to free the array
347  * returned including all elements in the array by calling bu_free()
348  * or bu_argv_free().
349  */
350 BU_EXPORT extern char **bu_argv_dup(size_t argc, const char *argv[]);
351 
352 /**
353  * Combine two argv arrays into one new (duplicated) argv array.
354  *
355  * If insert is negative, the insertArgv array elements will be
356  * prepended into the new argv array. If insert is greater than or
357  * equal to argc, the insertArgv array elements will be appended after
358  * all duplicated elements in the specified argv array. Otherwise,
359  * the insert argument is the position where the insertArgv array
360  * elements will be merged with the specified argv array.
361  */
362 BU_EXPORT extern char **bu_argv_dupinsert(int insert, size_t insertArgc, const char *insertArgv[], size_t argc, const char *argv[]);
363 
364 /**
365  * Calculate the Damarau Levenshtein edit distance between two strings. When
366  * max_dist is defined, calculation will terminate once that distance is reached
367  * and max_dist will be returned. When max_dist is 0, the calculation will
368  * proceed up to an internally defined calculation limit. */
369 BU_EXPORT size_t
370 bu_editdist(const char *s1, const char *s2);
371 
372 __END_DECLS
373 
374 /** @} */
375 
376 #endif /* BU_STR_H */
377 
378 /*
379  * Local Variables:
380  * mode: C
381  * tab-width: 8
382  * indent-tabs-mode: t
383  * c-file-style: "stroustrup"
384  * End:
385  * ex: shiftwidth=4 tabstop=8
386  */
Header file for the BRL-CAD common definitions.
void bu_argv_free(size_t argc, char *argv[])
size_t bu_strlcatm(char *dst, const char *src, size_t size, const char *label)
char ** bu_argv_dup(size_t argc, const char *argv[])
char * bu_str_unescape(const char *input, char *output, size_t size)
char * bu_str_escape(const char *input, const char *expression, char *output, size_t size)
These routines implement support for escaping and unescaping generalized strings that may represent f...
int bu_strcasecmp(const char *string1, const char *string2)
size_t bu_editdist(const char *s1, const char *s2)
char ** bu_argv_dupinsert(int insert, size_t insertArgc, const char *insertArgv[], size_t argc, const char *argv[])
void bu_free_args(size_t argc, char *argv[], const char *str)
int bu_strncasecmp(const char *string1, const char *string2, size_t n)
size_t bu_strlcpym(char *dst, const char *src, size_t size, const char *label)
int bu_argv_from_tcl_list(const char *list_str, int *argc, const char ***argv)
size_t bu_argv_from_string(char *argv[], size_t lim, char *lp)
Functions related to argv processing.
char * bu_strdupm(const char *cp, const char *label)
int bu_str_true(const char *str)
routines for parsing boolean values from strings
int bu_strncmp(const char *string1, const char *string2, size_t n)
int bu_str_isprint(const char *cp)
int bu_strcmp(const char *string1, const char *string2)
int bu_str_false(const char *str)
void float float int * n
Definition: tig.h:74
void float float int int int int float * size
Definition: tig.h:132
void float * input
Definition: tig.h:163