[FIX] BMP for MUC nicks, dip toes in vCard avatars

Can, your, Bifrost, Do, That. -lh
This commit is contained in:
LDA 2024-09-21 13:18:59 +02:00
commit 6167732e83
6 changed files with 378 additions and 17 deletions

225
src/Unistr.c Normal file
View file

@ -0,0 +1,225 @@
#include <Unistring.h>
#include <Cytoplasm/Memory.h>
#include <Cytoplasm/Str.h>
#include <Cytoplasm/Log.h>
#include <string.h>
struct Unistr {
size_t length;
uint32_t *codepoints;
};
void
UnistrAddch(Unistr *unistr, uint32_t u)
{
if (!unistr || !u)
{
return;
}
unistr->length++;
unistr->codepoints = Realloc(
unistr->codepoints,
unistr->length * sizeof(*unistr->codepoints)
);
unistr->codepoints[unistr->length - 1] = u;
}
static bool
UTFIsN(char *off, size_t available, int n, uint8_t pc)
{
int i;
uint8_t *offu = (uint8_t *) off;
if ((available < n) || ((*offu >> (8-n-1)) != pc))
{
return false;
}
for (i = 0; i < n - 1; i++)
{
if ((offu[i+1] >> 6) != 0b10)
{
return false;
}
}
return true;
}
Unistr *
UnistrCreate(char *src)
{
size_t len, i;
Unistr *str;
char *start;
if (!src)
{
return NULL;
}
len = strlen(src);
str = Malloc(sizeof(*str));
str->length = 0;
str->codepoints = NULL;
/* We can't just set the length to {len}. */
for (i = 0; i < len; i++)
{
char byte = src[i];
size_t available = len - i;
if ((byte & 0x80) == 0)
{
/* This is a regular codepoint */
UnistrAddch(str, byte & 0x7F);
continue;
}
else if (UTFIsN(&src[i], available, 2, 0b110))
{
char a = src[i+0] & 0b00011111;
char b = src[i+1] & 0b00111111;
uint32_t u = (a << (6 * 1)) | b;
/* Overlongs are errors. */
if (u < 0x0080 || u > 0x07FF)
{
UnistrFree(str);
return NULL;
}
UnistrAddch(str, u);
i += 2 - 1;
continue;
}
else if (UTFIsN(&src[i], available, 3, 0b1110))
{
char a = src[i+0] & 0b00001111;
char b = src[i+1] & 0b00111111;
char c = src[i+2] & 0b00111111;
uint32_t u =
(a << (6 * 2)) |
(b << (6 * 1)) |
(c << (6 * 0)) ;
/* Overlongs are errors. */
if (u < 0x0800 || u > 0xFFFF)
{
UnistrFree(str);
return NULL;
}
UnistrAddch(str, u);
i += 3 - 1;
continue;
}
else if (UTFIsN(&src[i], available, 4, 0b11110))
{
char a = src[i+0] & 0b00000111;
char b = src[i+1] & 0b00111111;
char c = src[i+2] & 0b00111111;
char d = src[i+3] & 0b00111111;
uint32_t u =
(a << (6 * 3)) |
(b << (6 * 2)) |
(c << (6 * 1)) |
(d << (6 * 0)) ;
/* Overlongs are errors. */
if (u < 0x10000 || u > 0x10FFFF)
{
UnistrFree(str);
return NULL;
}
UnistrAddch(str, u);
i += 4 - 1;
continue;
}
}
return str;
}
void
UnistrFree(Unistr *unistr)
{
if (!unistr)
{
return;
}
Free(unistr->codepoints);
Free(unistr);
}
char *
UnistrC(Unistr *unistr)
{
char *ret, *tmp, *utf;
size_t i;
if (!unistr)
{
return NULL;
}
ret = NULL;
for (i = 0; i < unistr->length; i++)
{
uint32_t code = unistr->codepoints[i];
utf = StrUtf8Encode(code);
tmp = ret;
ret = StrConcat(2, ret, utf);
Free(tmp);
Free(utf);
}
return ret;
}
size_t
UnistrSize(Unistr *unistr)
{
return unistr ? unistr->length : 0;
}
uint32_t
UnistrGetch(Unistr *unistr, size_t i)
{
if (!unistr)
{
return 0;
}
return i < unistr->length ? unistr->codepoints[i] : 0;
}
bool
UnistrIsBMP(uint32_t u)
{
if (u == 0)
{
return NULL;
}
return u <= 0xFFFF;
}
Unistr *
UnistrFilter(Unistr *str, UnistrFilterFunc filter)
{
Unistr *unistr;
size_t i;
if (!str || !filter)
{
return NULL;
}
unistr = UnistrCreate("");
for (i = 0; i < UnistrSize(str); i++)
{
uint32_t code = UnistrGetch(str, i);
if (!filter(code))
{
continue;
}
UnistrAddch(unistr, code);
}
return unistr;
}