aboutsummaryrefslogtreecommitdiff
path: root/gl/string.h
blob: e5bcef5549a34d11304230ccf2ea49dfa6754fc0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
/* A GNU-like <string.h>.

   Copyright (C) 1995-1996, 2001-2008 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#ifndef _GL_STRING_H

/* The include_next requires a split double-inclusion guard.  */
#include_next <string.h>

#ifndef _GL_STRING_H
#define _GL_STRING_H


#ifndef __attribute__
/* This feature is available in gcc versions 2.5 and later.  */
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
#  define __attribute__(Spec) /* empty */
# endif
/* The attribute __pure__ was added in gcc 2.96.  */
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
#  define __pure__ /* empty */
# endif
#endif


/* The definition of GL_LINK_WARNING is copied here.  */
/* GL_LINK_WARNING("literal string") arranges to emit the literal string as
   a linker warning on most glibc systems.
   We use a linker warning rather than a preprocessor warning, because
   #warning cannot be used inside macros.  */
#ifndef GL_LINK_WARNING
  /* This works on platforms with GNU ld and ELF object format.
     Testing __GLIBC__ is sufficient for asserting that GNU ld is in use.
     Testing __ELF__ guarantees the ELF object format.
     Testing __GNUC__ is necessary for the compound expression syntax.  */
# if defined __GLIBC__ && defined __ELF__ && defined __GNUC__
#  define GL_LINK_WARNING(message) \
     GL_LINK_WARNING1 (__FILE__, __LINE__, message)
#  define GL_LINK_WARNING1(file, line, message) \
     GL_LINK_WARNING2 (file, line, message)  /* macroexpand file and line */
#  define GL_LINK_WARNING2(file, line, message) \
     GL_LINK_WARNING3 (file ":" #line ": warning: " message)
#  define GL_LINK_WARNING3(message) \
     ({ static const char warning[sizeof (message)]		\
          __attribute__ ((__unused__,				\
                          __section__ (".gnu.warning"),		\
                          __aligned__ (1)))			\
          = message "\n";					\
        (void)0;						\
     })
# else
#  define GL_LINK_WARNING(message) ((void) 0)
# endif
#endif


#ifdef __cplusplus
extern "C" {
#endif


/* Return the first occurrence of NEEDLE in HAYSTACK.  */
#if 0
# if 0
#  define memmem rpl_memmem
# endif
# if ! 1 || 0
extern void *memmem (void const *__haystack, size_t __haystack_len,
		     void const *__needle, size_t __needle_len)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef memmem
# define memmem(a,al,b,bl) \
    (GL_LINK_WARNING ("memmem is unportable and often quadratic - " \
                      "use gnulib module memmem-simple for portability, " \
                      "and module memmem for speed" ), \
     memmem (a, al, b, bl))
#endif

/* Copy N bytes of SRC to DEST, return pointer to bytes after the
   last written byte.  */
#if 0
# if ! 1
extern void *mempcpy (void *restrict __dest, void const *restrict __src,
		      size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef mempcpy
# define mempcpy(a,b,n) \
    (GL_LINK_WARNING ("mempcpy is unportable - " \
                      "use gnulib module mempcpy for portability"), \
     mempcpy (a, b, n))
#endif

/* Search backwards through a block for a byte (specified as an int).  */
#if 0
# if ! 1
extern void *memrchr (void const *, int, size_t)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef memrchr
# define memrchr(a,b,c) \
    (GL_LINK_WARNING ("memrchr is unportable - " \
                      "use gnulib module memrchr for portability"), \
     memrchr (a, b, c))
#endif

/* Copy SRC to DST, returning the address of the terminating '\0' in DST.  */
#if 0
# if ! 1
extern char *stpcpy (char *restrict __dst, char const *restrict __src);
# endif
#elif defined GNULIB_POSIXCHECK
# undef stpcpy
# define stpcpy(a,b) \
    (GL_LINK_WARNING ("stpcpy is unportable - " \
                      "use gnulib module stpcpy for portability"), \
     stpcpy (a, b))
#endif

/* Copy no more than N bytes of SRC to DST, returning a pointer past the
   last non-NUL byte written into DST.  */
#if 0
# if ! 1
#  define stpncpy gnu_stpncpy
extern char *stpncpy (char *restrict __dst, char const *restrict __src,
		      size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef stpncpy
# define stpncpy(a,b,n) \
    (GL_LINK_WARNING ("stpncpy is unportable - " \
                      "use gnulib module stpncpy for portability"), \
     stpncpy (a, b, n))
#endif

#if defined GNULIB_POSIXCHECK
/* strchr() does not work with multibyte strings if the locale encoding is
   GB18030 and the character to be searched is a digit.  */
# undef strchr
# define strchr(s,c) \
    (GL_LINK_WARNING ("strchr cannot work correctly on character strings " \
                      "in some multibyte locales - " \
                      "use mbschr if you care about internationalization"), \
     strchr (s, c))
#endif

/* Find the first occurrence of C in S or the final NUL byte.  */
#if 0
# if ! 1
extern char *strchrnul (char const *__s, int __c_in)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef strchrnul
# define strchrnul(a,b) \
    (GL_LINK_WARNING ("strchrnul is unportable - " \
                      "use gnulib module strchrnul for portability"), \
     strchrnul (a, b))
#endif

/* Duplicate S, returning an identical malloc'd string.  */
#if 1
# if ! 1 && ! defined strdup
extern char *strdup (char const *__s);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strdup
# define strdup(a) \
    (GL_LINK_WARNING ("strdup is unportable - " \
                      "use gnulib module strdup for portability"), \
     strdup (a))
#endif

/* Return a newly allocated copy of at most N bytes of STRING.  */
#if 1
# if ! 1
#  undef strndup
#  define strndup rpl_strndup
# endif
# if ! 1 || ! 1
extern char *strndup (char const *__string, size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strndup
# define strndup(a,n) \
    (GL_LINK_WARNING ("strndup is unportable - " \
                      "use gnulib module strndup for portability"), \
     strndup (a, n))
#endif

/* Find the length (number of bytes) of STRING, but scan at most
   MAXLEN bytes.  If no '\0' terminator is found in that many bytes,
   return MAXLEN.  */
#if 1
# if ! 1
extern size_t strnlen (char const *__string, size_t __maxlen)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef strnlen
# define strnlen(a,n) \
    (GL_LINK_WARNING ("strnlen is unportable - " \
                      "use gnulib module strnlen for portability"), \
     strnlen (a, n))
#endif

#if defined GNULIB_POSIXCHECK
/* strcspn() assumes the second argument is a list of single-byte characters.
   Even in this simple case, it does not work with multibyte strings if the
   locale encoding is GB18030 and one of the characters to be searched is a
   digit.  */
# undef strcspn
# define strcspn(s,a) \
    (GL_LINK_WARNING ("strcspn cannot work correctly on character strings " \
                      "in multibyte locales - " \
                      "use mbscspn if you care about internationalization"), \
     strcspn (s, a))
#endif

/* Find the first occurrence in S of any character in ACCEPT.  */
#if 0
# if ! 1
extern char *strpbrk (char const *__s, char const *__accept)
  __attribute__ ((__pure__));
# endif
# if defined GNULIB_POSIXCHECK
/* strpbrk() assumes the second argument is a list of single-byte characters.
   Even in this simple case, it does not work with multibyte strings if the
   locale encoding is GB18030 and one of the characters to be searched is a
   digit.  */
#  undef strpbrk
#  define strpbrk(s,a) \
     (GL_LINK_WARNING ("strpbrk cannot work correctly on character strings " \
                       "in multibyte locales - " \
                       "use mbspbrk if you care about internationalization"), \
      strpbrk (s, a))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strpbrk
# define strpbrk(s,a) \
    (GL_LINK_WARNING ("strpbrk is unportable - " \
                      "use gnulib module strpbrk for portability"), \
     strpbrk (s, a))
#endif

#if defined GNULIB_POSIXCHECK
/* strspn() assumes the second argument is a list of single-byte characters.
   Even in this simple case, it cannot work with multibyte strings.  */
# undef strspn
# define strspn(s,a) \
    (GL_LINK_WARNING ("strspn cannot work correctly on character strings " \
                      "in multibyte locales - " \
                      "use mbsspn if you care about internationalization"), \
     strspn (s, a))
#endif

#if defined GNULIB_POSIXCHECK
/* strrchr() does not work with multibyte strings if the locale encoding is
   GB18030 and the character to be searched is a digit.  */
# undef strrchr
# define strrchr(s,c) \
    (GL_LINK_WARNING ("strrchr cannot work correctly on character strings " \
                      "in some multibyte locales - " \
                      "use mbsrchr if you care about internationalization"), \
     strrchr (s, c))
#endif

/* Search the next delimiter (char listed in DELIM) starting at *STRINGP.
   If one is found, overwrite it with a NUL, and advance *STRINGP
   to point to the next char after it.  Otherwise, set *STRINGP to NULL.
   If *STRINGP was already NULL, nothing happens.
   Return the old value of *STRINGP.

   This is a variant of strtok() that is multithread-safe and supports
   empty fields.

   Caveat: It modifies the original string.
   Caveat: These functions cannot be used on constant strings.
   Caveat: The identity of the delimiting character is lost.
   Caveat: It doesn't work with multibyte strings unless all of the delimiter
           characters are ASCII characters < 0x30.

   See also strtok_r().  */
#if 0
# if ! 1
extern char *strsep (char **restrict __stringp, char const *restrict __delim);
# endif
# if defined GNULIB_POSIXCHECK
#  undef strsep
#  define strsep(s,d) \
     (GL_LINK_WARNING ("strsep cannot work correctly on character strings " \
                       "in multibyte locales - " \
                       "use mbssep if you care about internationalization"), \
      strsep (s, d))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strsep
# define strsep(s,d) \
    (GL_LINK_WARNING ("strsep is unportable - " \
                      "use gnulib module strsep for portability"), \
     strsep (s, d))
#endif

#if 0
# if 0
#  define strstr rpl_strstr
char *strstr (const char *haystack, const char *needle)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
/* strstr() does not work with multibyte strings if the locale encoding is
   different from UTF-8:
   POSIX says that it operates on "strings", and "string" in POSIX is defined
   as a sequence of bytes, not of characters.  */
# undef strstr
# define strstr(a,b) \
    (GL_LINK_WARNING ("strstr is quadratic on many systems, and cannot " \
                      "work correctly on character strings in most "    \
                      "multibyte locales - " \
                      "use mbsstr if you care about internationalization, " \
                      "or use strstr if you care about speed"), \
     strstr (a, b))
#endif

/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
   comparison.  */
#if 0
# if 0
#  define strcasestr rpl_strcasestr
# endif
# if ! 1 || 0
extern char *strcasestr (const char *haystack, const char *needle)
  __attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
/* strcasestr() does not work with multibyte strings:
   It is a glibc extension, and glibc implements it only for unibyte
   locales.  */
# undef strcasestr
# define strcasestr(a,b) \
    (GL_LINK_WARNING ("strcasestr does work correctly on character strings " \
                      "in multibyte locales - " \
                      "use mbscasestr if you care about " \
                      "internationalization, or use c-strcasestr if you want " \
                      "a locale independent function"), \
     strcasestr (a, b))
#endif

/* Parse S into tokens separated by characters in DELIM.
   If S is NULL, the saved pointer in SAVE_PTR is used as
   the next starting point.  For example:
	char s[] = "-abc-=-def";
	char *sp;
	x = strtok_r(s, "-", &sp);	// x = "abc", sp = "=-def"
	x = strtok_r(NULL, "-=", &sp);	// x = "def", sp = NULL
	x = strtok_r(NULL, "=", &sp);	// x = NULL
		// s = "abc\0-def\0"

   This is a variant of strtok() that is multithread-safe.

   For the POSIX documentation for this function, see:
   http://www.opengroup.org/susv3xsh/strtok.html

   Caveat: It modifies the original string.
   Caveat: These functions cannot be used on constant strings.
   Caveat: The identity of the delimiting character is lost.
   Caveat: It doesn't work with multibyte strings unless all of the delimiter
           characters are ASCII characters < 0x30.

   See also strsep().  */
#if 0
# if ! 1
extern char *strtok_r (char *restrict s, char const *restrict delim,
		       char **restrict save_ptr);
# endif
# if defined GNULIB_POSIXCHECK
#  undef strtok_r
#  define strtok_r(s,d,p) \
     (GL_LINK_WARNING ("strtok_r cannot work correctly on character strings " \
                       "in multibyte locales - " \
                       "use mbstok_r if you care about internationalization"), \
      strtok_r (s, d, p))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strtok_r
# define strtok_r(s,d,p) \
    (GL_LINK_WARNING ("strtok_r is unportable - " \
                      "use gnulib module strtok_r for portability"), \
     strtok_r (s, d, p))
#endif


/* The following functions are not specified by POSIX.  They are gnulib
   extensions.  */

#if 0
/* Return the number of multibyte characters in the character string STRING.
   This considers multibyte characters, unlike strlen, which counts bytes.  */
extern size_t mbslen (const char *string);
#endif

#if 0
/* Return the number of multibyte characters in the character string starting
   at STRING and ending at STRING + LEN.  */
extern size_t mbsnlen (const char *string, size_t len);
#endif

#if 0
/* Locate the first single-byte character C in the character string STRING,
   and return a pointer to it.  Return NULL if C is not found in STRING.
   Unlike strchr(), this function works correctly in multibyte locales with
   encodings such as GB18030.  */
# define mbschr rpl_mbschr /* avoid collision with HP-UX function */
extern char * mbschr (const char *string, int c);
#endif

#if 0
/* Locate the last single-byte character C in the character string STRING,
   and return a pointer to it.  Return NULL if C is not found in STRING.
   Unlike strrchr(), this function works correctly in multibyte locales with
   encodings such as GB18030.  */
# define mbsrchr rpl_mbsrchr /* avoid collision with HP-UX function */
extern char * mbsrchr (const char *string, int c);
#endif

#if 0
/* Find the first occurrence of the character string NEEDLE in the character
   string HAYSTACK.  Return NULL if NEEDLE is not found in HAYSTACK.
   Unlike strstr(), this function works correctly in multibyte locales with
   encodings different from UTF-8.  */
extern char * mbsstr (const char *haystack, const char *needle);
#endif

#if 0
/* Compare the character strings S1 and S2, ignoring case, returning less than,
   equal to or greater than zero if S1 is lexicographically less than, equal to
   or greater than S2.
   Note: This function may, in multibyte locales, return 0 for strings of
   different lengths!
   Unlike strcasecmp(), this function works correctly in multibyte locales.  */
extern int mbscasecmp (const char *s1, const char *s2);
#endif

#if 0
/* Compare the initial segment of the character string S1 consisting of at most
   N characters with the initial segment of the character string S2 consisting
   of at most N characters, ignoring case, returning less than, equal to or
   greater than zero if the initial segment of S1 is lexicographically less
   than, equal to or greater than the initial segment of S2.
   Note: This function may, in multibyte locales, return 0 for initial segments
   of different lengths!
   Unlike strncasecmp(), this function works correctly in multibyte locales.
   But beware that N is not a byte count but a character count!  */
extern int mbsncasecmp (const char *s1, const char *s2, size_t n);
#endif

#if 0
/* Compare the initial segment of the character string STRING consisting of
   at most mbslen (PREFIX) characters with the character string PREFIX,
   ignoring case, returning less than, equal to or greater than zero if this
   initial segment is lexicographically less than, equal to or greater than
   PREFIX.
   Note: This function may, in multibyte locales, return 0 if STRING is of
   smaller length than PREFIX!
   Unlike strncasecmp(), this function works correctly in multibyte
   locales.  */
extern char * mbspcasecmp (const char *string, const char *prefix);
#endif

#if 0
/* Find the first occurrence of the character string NEEDLE in the character
   string HAYSTACK, using case-insensitive comparison.
   Note: This function may, in multibyte locales, return success even if
   strlen (haystack) < strlen (needle) !
   Unlike strcasestr(), this function works correctly in multibyte locales.  */
extern char * mbscasestr (const char *haystack, const char *needle);
#endif

#if 0
/* Find the first occurrence in the character string STRING of any character
   in the character string ACCEPT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.
   Unlike strcspn(), this function works correctly in multibyte locales.  */
extern size_t mbscspn (const char *string, const char *accept);
#endif

#if 0
/* Find the first occurrence in the character string STRING of any character
   in the character string ACCEPT.  Return the pointer to it, or NULL if none
   exists.
   Unlike strpbrk(), this function works correctly in multibyte locales.  */
# define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */
extern char * mbspbrk (const char *string, const char *accept);
#endif

#if 0
/* Find the first occurrence in the character string STRING of any character
   not in the character string REJECT.  Return the number of bytes from the
   beginning of the string to this occurrence, or to the end of the string
   if none exists.
   Unlike strspn(), this function works correctly in multibyte locales.  */
extern size_t mbsspn (const char *string, const char *reject);
#endif

#if 0
/* Search the next delimiter (multibyte character listed in the character
   string DELIM) starting at the character string *STRINGP.
   If one is found, overwrite it with a NUL, and advance *STRINGP to point
   to the next multibyte character after it.  Otherwise, set *STRINGP to NULL.
   If *STRINGP was already NULL, nothing happens.
   Return the old value of *STRINGP.

   This is a variant of mbstok_r() that supports empty fields.

   Caveat: It modifies the original string.
   Caveat: These functions cannot be used on constant strings.
   Caveat: The identity of the delimiting character is lost.

   See also mbstok_r().  */
extern char * mbssep (char **stringp, const char *delim);
#endif

#if 0
/* Parse the character string STRING into tokens separated by characters in
   the character string DELIM.
   If STRING is NULL, the saved pointer in SAVE_PTR is used as
   the next starting point.  For example:
	char s[] = "-abc-=-def";
	char *sp;
	x = mbstok_r(s, "-", &sp);	// x = "abc", sp = "=-def"
	x = mbstok_r(NULL, "-=", &sp);	// x = "def", sp = NULL
	x = mbstok_r(NULL, "=", &sp);	// x = NULL
		// s = "abc\0-def\0"

   Caveat: It modifies the original string.
   Caveat: These functions cannot be used on constant strings.
   Caveat: The identity of the delimiting character is lost.

   See also mbssep().  */
extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
#endif

/* Map any int, typically from errno, into an error message.  */
#if 1
# if 0
#  undef strerror
#  define strerror rpl_strerror
extern char *strerror (int);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strerror
# define strerror(e) \
    (GL_LINK_WARNING ("strerror is unportable - " \
                      "use gnulib module strerror to guarantee non-NULL result"), \
     strerror (e))
#endif

#if 0
# if 0
#  define strsignal rpl_strsignal
# endif
# if ! 1 || 0
extern char *strsignal (int __sig);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strsignal
# define strsignal(a) \
    (GL_LINK_WARNING ("strsignal is unportable - " \
                      "use gnulib module strsignal for portability"), \
     strsignal (a))
#endif


#ifdef __cplusplus
}
#endif

#endif /* _GL_STRING_H */
#endif /* _GL_STRING_H */