// Copyright (C) 2004-2022 Artifex Software, Inc. // // This file is part of MuPDF. // // MuPDF is free software: you can redistribute it and/or modify it under the // terms of the GNU Affero General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more // details. // // You should have received a copy of the GNU Affero General Public License // along with MuPDF. If not, see // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, // CA 94129, USA, for further information. #ifndef MUPDF_FITZ_STRING_H #define MUPDF_FITZ_STRING_H #include "mupdf/fitz/system.h" #include "mupdf/fitz/context.h" /* The Unicode character used to incoming character whose value is * unknown or unrepresentable. */ #define FZ_REPLACEMENT_CHARACTER 0xFFFD /** Safe string functions */ /** Return strlen(s), if that is less than maxlen, or maxlen if there is no null byte ('\0') among the first maxlen bytes. */ size_t fz_strnlen(const char *s, size_t maxlen); /** Given a pointer to a C string (or a pointer to NULL) break it at the first occurrence of a delimiter char (from a given set). stringp: Pointer to a C string pointer (or NULL). Updated on exit to point to the first char of the string after the delimiter that was found. The string pointed to by stringp will be corrupted by this call (as the found delimiter will be overwritten by 0). delim: A C string of acceptable delimiter characters. Returns a pointer to a C string containing the chars of stringp up to the first delimiter char (or the end of the string), or NULL. */ char *fz_strsep(char **stringp, const char *delim); /** Copy at most n-1 chars of a string into a destination buffer with null termination, returning the real length of the initial string (excluding terminator). dst: Destination buffer, at least n bytes long. src: C string (non-NULL). n: Size of dst buffer in bytes. Returns the length (excluding terminator) of src. */ size_t fz_strlcpy(char *dst, const char *src, size_t n); /** Concatenate 2 strings, with a maximum length. dst: pointer to first string in a buffer of n bytes. src: pointer to string to concatenate. n: Size (in bytes) of buffer that dst is in. Returns the real length that a concatenated dst + src would have been (not including terminator). */ size_t fz_strlcat(char *dst, const char *src, size_t n); /** Find the start of the first occurrence of the substring needle in haystack. */ void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); /** extract the directory component from a path. */ void fz_dirname(char *dir, const char *path, size_t dirsize); /** Find the filename component in a path. */ const char *fz_basename(const char *path); /** Like fz_decode_uri_component but in-place. */ char *fz_urldecode(char *url); /** * Return a new string representing the unencoded version of the given URI. * This decodes all escape sequences except those that would result in a reserved * character that are part of the URI syntax (; / ? : @ & = + $ , #). */ char *fz_decode_uri(fz_context *ctx, const char *s); /** * Return a new string representing the unencoded version of the given URI component. * This decodes all escape sequences! */ char *fz_decode_uri_component(fz_context *ctx, const char *s); /** * Return a new string representing the provided string encoded as a URI. */ char *fz_encode_uri(fz_context *ctx, const char *s); /** * Return a new string representing the provided string encoded as an URI component. * This also encodes the special reserved characters (; / ? : @ & = + $ , #). */ char *fz_encode_uri_component(fz_context *ctx, const char *s); /** * Return a new string representing the provided string encoded as an URI path name. * This also encodes the special reserved characters except /. */ char *fz_encode_uri_pathname(fz_context *ctx, const char *s); /** create output file name using a template. If the path contains %[0-9]*d, the first such pattern will be replaced with the page number. If the template does not contain such a pattern, the page number will be inserted before the filename extension. If the template does not have a filename extension, the page number will be added to the end. */ void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page); /** rewrite path to the shortest string that names the same path. Eliminates multiple and trailing slashes, interprets "." and "..". Overwrites the string in place. */ char *fz_cleanname(char *name); /** Resolve a path to an absolute file name. The resolved path buffer must be of at least PATH_MAX size. */ char *fz_realpath(const char *path, char *resolved_path); /** Case insensitive (ASCII only) string comparison. */ int fz_strcasecmp(const char *a, const char *b); int fz_strncasecmp(const char *a, const char *b, size_t n); /** FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum length returned by fz_chartorune). */ enum { FZ_UTFMAX = 4 }; /** UTF8 decode a single rune from a sequence of chars. rune: Pointer to an int to assign the decoded 'rune' to. str: Pointer to a UTF8 encoded string. Returns the number of bytes consumed. */ int fz_chartorune(int *rune, const char *str); /** UTF8 encode a rune to a sequence of chars. str: Pointer to a place to put the UTF8 encoded character. rune: Pointer to a 'rune'. Returns the number of bytes the rune took to output. */ int fz_runetochar(char *str, int rune); /** Count how many chars are required to represent a rune. rune: The rune to encode. Returns the number of bytes required to represent this run in UTF8. */ int fz_runelen(int rune); /** Compute the index of a rune in a string. str: Pointer to beginning of a string. p: Pointer to a char in str. Returns the index of the rune pointed to by p in str. */ int fz_runeidx(const char *str, const char *p); /** Obtain a pointer to the char representing the rune at a given index. str: Pointer to beginning of a string. idx: Index of a rune to return a char pointer to. Returns a pointer to the char where the desired rune starts, or NULL if the string ends before the index is reached. */ const char *fz_runeptr(const char *str, int idx); /** Count how many runes the UTF-8 encoded string consists of. s: The UTF-8 encoded, NUL-terminated text string. Returns the number of runes in the string. */ int fz_utflen(const char *s); /** Locale-independent decimal to binary conversion. On overflow return (-)INFINITY and set errno to ERANGE. On underflow return 0 and set errno to ERANGE. Special inputs (case insensitive): "NAN", "INF" or "INFINITY". */ float fz_strtof(const char *s, char **es); int fz_grisu(float f, char *s, int *exp); /** Check and parse string into page ranges: /,?(-?\d+|N)(-(-?\d+|N))?/ */ int fz_is_page_range(fz_context *ctx, const char *s); const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n); /** Unicode aware tolower and toupper functions. */ int fz_tolower(int c); int fz_toupper(int c); #endif