mirror of
https://forge.fsky.io/lda/Parsee.git
synced 2026-03-13 15:15:10 +00:00
79 lines
2.5 KiB
C
79 lines
2.5 KiB
C
#ifndef PARSEE_UNISTRING_H
|
|
#define PARSEE_UNISTRING_H
|
|
|
|
/*-* <p>A basic datastructure to handle Unicode strings easily.</p>
|
|
* <p>Mainly used because dealing with UTF-8 directly may be an
|
|
* annoyance, and it may be used as a base for Cytoplasm's own
|
|
* string management</p>
|
|
* --------
|
|
* Written-By: LDA
|
|
* License: CC0 */
|
|
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
|
|
/* An opaque structure for a Unistring */
|
|
typedef struct Unistr Unistr;
|
|
|
|
/** Decodes an UTF-8 string into a separate Unistr.
|
|
* -------
|
|
* Returns: a valid Unistr[HEAP] | NULL
|
|
* Thrasher: UnistrFree */
|
|
extern Unistr * UnistrCreate(char *src);
|
|
|
|
/** Returns the length of an unistring.
|
|
* ----------
|
|
* Returns: the unistring's length | NULL */
|
|
extern size_t UnistrSize(Unistr *unistr);
|
|
|
|
/** Returns the character of an unistring at a location,
|
|
* or 0 if it is inaccessible.
|
|
* ----------
|
|
* Returns: The Unicode codepoint of a specific 0-index | 0 */
|
|
extern uint32_t UnistrGetch(Unistr *unistr, size_t i);
|
|
|
|
/** Adds a singular codepoint to a unistring(IFF not 0 and valid).
|
|
* -------------
|
|
* Returns: NOTHING
|
|
* Modifies: unistr */
|
|
extern void UnistrAddch(Unistr *unistr, uint32_t u);
|
|
|
|
/** Concats N unistrings into a new, separate unistring.
|
|
* ---------------------
|
|
* Returns: a new unistring[HEAP]
|
|
* Modifies: NOTHING
|
|
* Thrasher: UnistrFree */
|
|
extern Unistr * UnistrConcat(size_t n, ...);
|
|
|
|
/** Encodes a unistring into a C UTF-8 string
|
|
* --------------
|
|
* Returns: a valid NULL-terminated string[HEAP] | NULL
|
|
* Thrasher: Free */
|
|
extern char * UnistrC(Unistr *unistr);
|
|
|
|
/** Destroys all memory associated with a unistring.
|
|
* ----------
|
|
* Returns: NOTHING
|
|
* Thrashes: {unistr} */
|
|
extern void UnistrFree(Unistr *unistr);
|
|
|
|
/** Returns true IFF the character is within the unicode BMP and
|
|
* not 0x0000
|
|
* ------------------------------------------------------------
|
|
* Returns: whenever the character is within the BMP */
|
|
extern bool UnistrIsBMP(uint32_t u);
|
|
|
|
typedef bool (*UnistrFilterFunc)(uint32_t u);
|
|
/** "Filters" characters in a Unistring by codepoint, removing
|
|
* those with callbacks which return false into a new unistring.
|
|
* --------------------
|
|
* Returns: a new unistring with filtered characters removed */
|
|
extern Unistr * UnistrFilter(Unistr *str, UnistrFilterFunc filter);
|
|
|
|
/** Finds the offset of the first line not starting with a specific
|
|
* characters in terms of Unicode codepoints.
|
|
* --------
|
|
* Returns: an offset of the first line to not start by {c} | 0 */
|
|
extern size_t UnistrGetOffset(Unistr *str, uint32_t sep);
|
|
#endif
|