#ifndef PARSEE_UNISTRING_H #define PARSEE_UNISTRING_H /*-*

A basic datastructure to handle Unicode strings easily.

*

Mainly used because dealing with UTF-8 directly may be an * annoyance, and it may be used as a base for Cytoplasm's own * string management

* -------- * Written-By: LDA * License: CC0 */ #include #include #include /* An opaque structure for a Unistring */ typedef struct Unistr Unistr; /** Decodes an UTF-8 string into a separate Unistr. * ------- * Returns: a valid Unistr[HEAP] | NULL * Thrasher: UnistrFree */ extern Unistr * UnistrCreate(char *src); /** Returns the length of an unistring. * ---------- * Returns: the unistring's length | NULL */ extern size_t UnistrSize(Unistr *unistr); /** Returns the character of an unistring at a location, * or 0 if it is inaccessible. * ---------- * Returns: The Unicode codepoint of a specific 0-index | 0 */ extern uint32_t UnistrGetch(Unistr *unistr, size_t i); /** Adds a singular codepoint to a unistring(IFF not 0 and valid). * ------------- * Returns: NOTHING * Modifies: unistr */ extern void UnistrAddch(Unistr *unistr, uint32_t u); /** Concats N unistrings into a new, separate unistring. * --------------------- * Returns: a new unistring[HEAP] * Modifies: NOTHING * Thrasher: UnistrFree */ extern Unistr * UnistrConcat(size_t n, ...); /** Encodes a unistring into a C UTF-8 string * -------------- * Returns: a valid NULL-terminated string[HEAP] | NULL * Thrasher: Free */ extern char * UnistrC(Unistr *unistr); /** Destroys all memory associated with a unistring. * ---------- * Returns: NOTHING * Thrashes: {unistr} */ extern void UnistrFree(Unistr *unistr); /** Returns true IFF the character is within the unicode BMP and * not 0x0000 * ------------------------------------------------------------ * Returns: whenever the character is within the BMP */ extern bool UnistrIsBMP(uint32_t u); typedef bool (*UnistrFilterFunc)(uint32_t u); /** "Filters" characters in a Unistring by codepoint, removing * those with callbacks which return false into a new unistring. * -------------------- * Returns: a new unistring with filtered characters removed */ extern Unistr * UnistrFilter(Unistr *str, UnistrFilterFunc filter); /** Finds the offset of the first line not starting with a specific * characters in terms of Unicode codepoints. * -------- * Returns: an offset of the first line to not start by {c} | 0 */ extern size_t UnistrGetOffset(Unistr *str, uint32_t sep); #endif