From 6167732e83ef23ddadb4d4e554c984c59c872d74 Mon Sep 17 00:00:00 2001 From: LDA Date: Sat, 21 Sep 2024 13:18:59 +0200 Subject: [PATCH] [FIX] BMP for MUC nicks, dip toes in vCard avatars Can, your, Bifrost, Do, That. -lh --- README.MD | 25 ++-- src/Main.c | 13 +++ src/MatrixEventHandler.c | 10 +- src/Unistr.c | 225 ++++++++++++++++++++++++++++++++++++ src/XMPPThread/Stanzas/IQ.c | 55 +++++++-- src/include/Unistring.h | 67 +++++++++++ 6 files changed, 378 insertions(+), 17 deletions(-) create mode 100644 src/Unistr.c create mode 100644 src/include/Unistring.h diff --git a/README.MD b/README.MD index 51cc590..0467280 100644 --- a/README.MD +++ b/README.MD @@ -1,22 +1,25 @@ # Parsee - the jealous XMPP<=>Matrix bridge Parsee is a Matrix<=>XMPP bridge written in C99, with Cytoplasm, similar to Bifrost, but it is NOT a drop-in replacment. -Currently, it is *alpha* stage, which means that I wouldn't recommend using this in production, as I can change anything, at any time. +Currently, it is *alpha* stage, which means that I wouldn't recommend using this in production, +as I can change anything, at any time, and it may behave strangely at times. ## Why? ### Naming The name 'Parsee' is actually a reference to [Parsee Mizuhashi](https://en.touhouwiki.net/wiki/Parsee_Mizuhashi), -a "*bridge* princess". +a "*bridge* princess". The other name you actually can sometimes see explains itself, so I won't +be talking about it. ### Reasoning (personal to LDA) I hate Bifrost. I also wanted to dip my toes in XMPP, XML, and bridges a bit. Also, as a sister project to KappaChat, this means that I can integrate Parsee with KappaChat however I wish it to be, which allows me to mess around with a codebase I'm already familiar with. -A more "up-to-date" reason may be to have a small, 'Just Werks' bridging solution *that is good*. +A more "up-to-date" reason may be to have a small, 'Just Werks' bridging solution *that is good*, +and maybe as a testing ground for Cytoplasm features I sometimes add. -Well, I'm *trying* to do that, at least. +(Well, I'm *trying* to do that, at least. Please scream at me if that fails(or just doesn't run on a overclocked Raspberry -Pi 4B, which, by the way, is literally where Parsee+XMPP is running for now.) +Pi 4B, which, by the way, is literally where Parsee+XMPP is running for now.)) ### "Why not just use Matrix lol" ### "Why not just use XMPP lol" @@ -26,7 +29,7 @@ a bridge may be a good way to start. ## BUILDING ```sh -$ cc configure.c -o configure +$ cc configure.c -o configure # that or use tcc -run to consolidate these two steps. $ ./configure # use -s if you want static Parsee+MbedTLS, use -s -l if LMDB is needed $ make $ make [PREFIX=...] install # run as root if on a protected dir like /usr @@ -65,17 +68,21 @@ returns with a landing page, then this side works. You can read it for some more ## DOCS Currently, the main sources of documentation are the Ayadocs(for headers) and the manpages -(see `etc/man`) +(see `etc/man`). ## TODOS before 1.0 rolls around -- PROPER FUCKING AVATARS +- Make Parsee go *vroooooooooommmmmmm*, by NOT asking the server constantly +about what is available and what is not, as that is a source of latency, and +thus slowdowns. +- PROPER FUCKING VCARD AVATARS XMPP->Matrix is decent, Matrix->XMPP is effectively a WIP - Add [libomemo](https://github.com/gkdr/libomemo) or something as an optional dependency. - It depends on more stuff anyways, and I don't want to weigh down the dependency list of Parsee for that. - Matrix's libolm is deprecated. They replaced it with a Rust version that pulls in *way too many* dependencies, and that lacks a C binding. We may - put in the work of either forking off libolm or making a binding to KappaChat. + ~~put in the work of either forking off libolm or~~ be making a binding with + KappaChat(when I get around to remaking UI :p). - Josh did infact tell me that maybe C bindings may happen. I'd be willing to help out, but IDK. In any case, this will at best be an extension packagers may integrate properly. diff --git a/src/Main.c b/src/Main.c index fe976bf..b0ed711 100644 --- a/src/Main.c +++ b/src/Main.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -71,6 +72,18 @@ Main(Array *args, HashMap *env) start = UtilTsMillis(); + { + Unistr *s = UnistrCreate("Array 日本語🌋"); + size_t i; + for (i = 0; i < UnistrSize(s); i++) + { + uint32_t cp = UnistrGetch(s, i); + Log(LOG_INFO, "%X", cp); + } + + UnistrFree(s); + } + memset(&conf, 0, sizeof(conf)); Log(LOG_INFO, "%s - v%s[%s] (Cytoplasm %s)", diff --git a/src/MatrixEventHandler.c b/src/MatrixEventHandler.c index 3835fe8..73feda5 100644 --- a/src/MatrixEventHandler.c +++ b/src/MatrixEventHandler.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -21,10 +22,17 @@ JoinMUC(ParseeData *data, HashMap *event, char *jid, char *muc, char *name) { char *sender = GrabString(event, 1, "sender"); - char *nick = StrDuplicate(name); + Unistr *uninick = UnistrCreate(name); + Unistr *filtered = UnistrFilter(uninick, UnistrIsBMP); + char *nick = UnistrC(filtered); char *rev = StrConcat(3, muc, "/", nick); int nonce = 0; + Log(LOG_DEBUG, "MUCJOINER: filtered '%s' to '%s'", name, nick); + + UnistrFree(uninick); + UnistrFree(filtered); + while (!XMPPJoinMUC(data->jabber, jid, rev, true) && nonce < 32) { char *nonce_str = StrInt(nonce); diff --git a/src/Unistr.c b/src/Unistr.c new file mode 100644 index 0000000..b9439c8 --- /dev/null +++ b/src/Unistr.c @@ -0,0 +1,225 @@ +#include + +#include +#include +#include + +#include + +struct Unistr { + size_t length; + uint32_t *codepoints; +}; + +void +UnistrAddch(Unistr *unistr, uint32_t u) +{ + if (!unistr || !u) + { + return; + } + unistr->length++; + unistr->codepoints = Realloc( + unistr->codepoints, + unistr->length * sizeof(*unistr->codepoints) + ); + + unistr->codepoints[unistr->length - 1] = u; +} + +static bool +UTFIsN(char *off, size_t available, int n, uint8_t pc) +{ + int i; + uint8_t *offu = (uint8_t *) off; + if ((available < n) || ((*offu >> (8-n-1)) != pc)) + { + return false; + } + + for (i = 0; i < n - 1; i++) + { + if ((offu[i+1] >> 6) != 0b10) + { + return false; + } + } + return true; +} + +Unistr * +UnistrCreate(char *src) +{ + size_t len, i; + Unistr *str; + char *start; + if (!src) + { + return NULL; + } + + len = strlen(src); + str = Malloc(sizeof(*str)); + str->length = 0; + str->codepoints = NULL; + + /* We can't just set the length to {len}. */ + for (i = 0; i < len; i++) + { + char byte = src[i]; + size_t available = len - i; + if ((byte & 0x80) == 0) + { + /* This is a regular codepoint */ + UnistrAddch(str, byte & 0x7F); + continue; + } + else if (UTFIsN(&src[i], available, 2, 0b110)) + { + char a = src[i+0] & 0b00011111; + char b = src[i+1] & 0b00111111; + uint32_t u = (a << (6 * 1)) | b; + + /* Overlongs are errors. */ + if (u < 0x0080 || u > 0x07FF) + { + UnistrFree(str); + return NULL; + } + + UnistrAddch(str, u); + i += 2 - 1; + continue; + } + else if (UTFIsN(&src[i], available, 3, 0b1110)) + { + char a = src[i+0] & 0b00001111; + char b = src[i+1] & 0b00111111; + char c = src[i+2] & 0b00111111; + uint32_t u = + (a << (6 * 2)) | + (b << (6 * 1)) | + (c << (6 * 0)) ; + + /* Overlongs are errors. */ + if (u < 0x0800 || u > 0xFFFF) + { + UnistrFree(str); + return NULL; + } + + UnistrAddch(str, u); + i += 3 - 1; + continue; + } + else if (UTFIsN(&src[i], available, 4, 0b11110)) + { + char a = src[i+0] & 0b00000111; + char b = src[i+1] & 0b00111111; + char c = src[i+2] & 0b00111111; + char d = src[i+3] & 0b00111111; + uint32_t u = + (a << (6 * 3)) | + (b << (6 * 2)) | + (c << (6 * 1)) | + (d << (6 * 0)) ; + + /* Overlongs are errors. */ + if (u < 0x10000 || u > 0x10FFFF) + { + UnistrFree(str); + return NULL; + } + + UnistrAddch(str, u); + i += 4 - 1; + continue; + + } + } + + return str; +} +void +UnistrFree(Unistr *unistr) +{ + if (!unistr) + { + return; + } + + Free(unistr->codepoints); + Free(unistr); +} +char * +UnistrC(Unistr *unistr) +{ + char *ret, *tmp, *utf; + size_t i; + if (!unistr) + { + return NULL; + } + + ret = NULL; + for (i = 0; i < unistr->length; i++) + { + uint32_t code = unistr->codepoints[i]; + utf = StrUtf8Encode(code); + + tmp = ret; + ret = StrConcat(2, ret, utf); + Free(tmp); + Free(utf); + } + + return ret; +} +size_t +UnistrSize(Unistr *unistr) +{ + return unistr ? unistr->length : 0; +} +uint32_t +UnistrGetch(Unistr *unistr, size_t i) +{ + if (!unistr) + { + return 0; + } + + return i < unistr->length ? unistr->codepoints[i] : 0; +} +bool +UnistrIsBMP(uint32_t u) +{ + if (u == 0) + { + return NULL; + } + + return u <= 0xFFFF; +} +Unistr * +UnistrFilter(Unistr *str, UnistrFilterFunc filter) +{ + Unistr *unistr; + size_t i; + if (!str || !filter) + { + return NULL; + } + + unistr = UnistrCreate(""); + for (i = 0; i < UnistrSize(str); i++) + { + uint32_t code = UnistrGetch(str, i); + if (!filter(code)) + { + continue; + } + UnistrAddch(unistr, code); + } + + return unistr; +} diff --git a/src/XMPPThread/Stanzas/IQ.c b/src/XMPPThread/Stanzas/IQ.c index 6058ba4..c6e929f 100644 --- a/src/XMPPThread/Stanzas/IQ.c +++ b/src/XMPPThread/Stanzas/IQ.c @@ -356,14 +356,14 @@ IQGet(ParseeData *args, XMLElement *stanza, XMPPThread *thr) } else if (XMLookForTKV(stanza, "vCard", "xmlns", "vcard-temp")) { - Log(LOG_INFO, "vCard information GET for %s", to); + char *to_matrix = ParseeGetBridgedUser(args, stanza); + char *name = ASGetName(args->config, NULL, to_matrix); + XMLElement *iqVCard; + Log(LOG_DEBUG, "vCard information GET for %s", to); - /* TODO: "a compliant server MUST respond on behalf of the - * requestor and not forward the IQ to the requestee's - * connected resource". */ if (!strncmp(to, "parsee@", 7)) { - XMLElement *iqVCard = XMLCreateTag("iq"); + iqVCard = XMLCreateTag("iq"); XMLAddAttr(iqVCard, "from", to); XMLAddAttr(iqVCard, "to", from); XMLAddAttr(iqVCard, "id", id); @@ -394,7 +394,49 @@ IQGet(ParseeData *args, XMLElement *stanza, XMPPThread *thr) StreamFlush(jabber->stream); pthread_mutex_unlock(&jabber->write_lock); XMLFreeElement(iqVCard); + Free(to_matrix); + Free(name); + return; } + + iqVCard = XMLCreateTag("iq"); + XMLAddAttr(iqVCard, "from", to); + XMLAddAttr(iqVCard, "to", from); + XMLAddAttr(iqVCard, "id", id); + XMLAddAttr(iqVCard, "type", "result"); + { + XMLElement *vCard = XMLCreateTag("vCard"); + char *mto_link = ParseeGenerateMTO(to_matrix); + XMLAddAttr(vCard, "xmlns", "vcard-temp"); + { + XMLElement *fn = CreateTagWithText( + "FN", name ? name : to_matrix + ); + XMLElement *nick = CreateTagWithText( + "NICKNAME", to_matrix + ); + XMLElement *url = CreateTagWithText( + "URL", mto_link + ); + + /* TODO: Maybe abstract the vCard code. */ + /* TODO: Make a function to just get a user's avatar + * automatically. */ + XMLAddChild(vCard, nick); + XMLAddChild(vCard, url); + XMLAddChild(vCard, fn); + + Free(mto_link); + } + XMLAddChild(iqVCard, vCard); + } + + pthread_mutex_lock(&jabber->write_lock); + XMLEncode(jabber->stream, iqVCard); + StreamFlush(jabber->stream); + pthread_mutex_unlock(&jabber->write_lock); + Free(to_matrix); + Free(name); } #define PS "http://jabber.org/protocol/pubsub" else if ((pubsub = XMLookForTKV(stanza, "pubsub", "xmlns", PS))) @@ -417,8 +459,7 @@ IQGet(ParseeData *args, XMLElement *stanza, XMPPThread *thr) b64 = Base64Encode(buf, len); Free(buf); - Log(LOG_INFO, "FM=%s", to_matrix); - Log(LOG_INFO, "B=%s (%dB)", b64, (int) len); + Log(LOG_DEBUG, "IQ-GET: PUBSUB AVATAR OF=%s", to_matrix); /* Strike back with a response */ reply = XMLCreateTag("iq"); XMLAddAttr(reply, "type", "result"); diff --git a/src/include/Unistring.h b/src/include/Unistring.h new file mode 100644 index 0000000..f1c9aee --- /dev/null +++ b/src/include/Unistring.h @@ -0,0 +1,67 @@ +#ifndef PARSEE_UNISTRING_H +#define PARSEE_UNISTRING_H + +/*-*

A basic datastructure to handle Unicode strings easily.

+ *

Mainly used because dealing with UTF-8 directly may be an + * annoyance, and it may be used as a base for Cytoplasm's own + * string management

+ * -------- + * Written-By: LDA + * License: CC0 */ + +#include +#include +#include + +/* An opaque structure for a Unistring */ +typedef struct Unistr Unistr; + +/** Decodes an UTF-8 string into a separate Unistr. + * ------- + * Returns: a valid Unistr[HEAP] | NULL + * Thrasher: UnistrFree */ +extern Unistr * UnistrCreate(char *src); + +/** Returns the length of an unistring. + * ---------- + * Returns: the unistring's length | NULL */ +extern size_t UnistrSize(Unistr *unistr); + +/** Returns the character of an unistring at a location, + * or 0 if it is inaccessible. + * ---------- + * Returns: The Unicode codepoint of a specific 0-index | 0 */ +extern uint32_t UnistrGetch(Unistr *unistr, size_t i); + +/** Adds a singular codepoint to a unistring(IFF not 0 and valid). + * ------------- + * Returns: NOTHING + * Modifies: unistr */ +extern void UnistrAddch(Unistr *unistr, uint32_t u); + +/** Encodes a unistring into a C UTF-8 string + * -------------- + * Returns: a valid NULL-terminated string[HEAP] | NULL + * Thrasher: Free */ +extern char * UnistrC(Unistr *unistr); + +/** Destroys all memory associated with a unistring. + * ---------- + * Returns: NOTHING + * Thrashes: {unistr} */ +extern void UnistrFree(Unistr *unistr); + +/** Returns true IFF the character is within the unicode BMP and + * not 0x0000 + * ------------------------------------------------------------ + * Returns: whenever the character is within the BMP */ +extern bool UnistrIsBMP(uint32_t u); + +typedef bool (*UnistrFilterFunc)(uint32_t u); +/** "Filters" characters in a Unistring by codepoint, removing + * those with callbacks which return false into a new unistring. + * -------------------- + * Returns: a new unistring with filtered characters removed */ +extern Unistr * UnistrFilter(Unistr *str, UnistrFilterFunc filter); + +#endif