[MOD/WIP] Mess a bit with the XEP-0393 parser

It took a comical amount of time for me to do that LMAO
This commit is contained in:
LDA 2024-08-01 10:31:59 +02:00
commit cb0e77e7a4
5 changed files with 345 additions and 148 deletions

View file

@ -85,7 +85,7 @@ ParseeCleanup(void *datp)
#define CleanupField(field, timeout, threshold) do \ #define CleanupField(field, timeout, threshold) do \
{ \ { \
size_t fields = 0, cleaned = 0; \ size_t cleaned = 0; \
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \ field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
to_delete = ArrayCreate(); \ to_delete = ArrayCreate(); \
while (HashMapIterate(field##s, &field, (void **) &val)) \ while (HashMapIterate(field##s, &field, (void **) &val)) \
@ -93,7 +93,6 @@ ParseeCleanup(void *datp)
HashMap *obj = JsonValueAsObject(val); \ HashMap *obj = JsonValueAsObject(val); \
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \ uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
uint64_t dur = ts - age; \ uint64_t dur = ts - age; \
fields++; \
\ \
if ((dur > (timeout))) \ if ((dur > (timeout))) \
{ \ { \
@ -140,7 +139,7 @@ ParseeCleanup(void *datp)
#define CleanupField(field, timeout, threshold) do \ #define CleanupField(field, timeout, threshold) do \
{ \ { \
size_t fields = 0, cleaned = 0; \ size_t cleaned = 0; \
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \ field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
to_delete = ArrayCreate(); \ to_delete = ArrayCreate(); \
while (HashMapIterate(field##s, &field, (void **) &val)) \ while (HashMapIterate(field##s, &field, (void **) &val)) \
@ -148,7 +147,6 @@ ParseeCleanup(void *datp)
HashMap *obj = JsonValueAsObject(val); \ HashMap *obj = JsonValueAsObject(val); \
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \ uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
uint64_t dur = ts - age; \ uint64_t dur = ts - age; \
fields++; \
\ \
if ((dur > (timeout))) \ if ((dur > (timeout))) \
{ \ { \

View file

@ -1,9 +1,13 @@
#include <StringSplit.h> #include <StringSplit.h>
#include <Cytoplasm/Memory.h> #include <Cytoplasm/Memory.h>
#include <Cytoplasm/Str.h>
#include <Cytoplasm/Log.h>
#include <stdbool.h> #include <stdbool.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <ctype.h>
char ** char **
StrSplitLines(char *text) StrSplitLines(char *text)
@ -109,3 +113,131 @@ StrFullRect(char **split)
.source_lines = split .source_lines = split
}); });
} }
char
StrGet(StringRect rect, int line, int col)
{
int actual_line, actual_col;
char *linep;
if (!rect.source_lines)
{
return '\0';
}
actual_line = rect.start_line + line;
actual_col = rect.start_char + col;
if (actual_line > rect.end_line)
{
return '\0';
}
if (!(linep = rect.source_lines[actual_line]))
{
return '\0';
}
if (actual_col > strlen(linep))
{
return '\0';
}
return linep[actual_col];
}
size_t
StrViewChars(StringRect rect, int line)
{
int actual_line;
char *linep;
if (!rect.source_lines)
{
return 0;
}
actual_line = rect.start_line + line;
if (actual_line > rect.end_line)
{
return 0;
}
if (!(linep = rect.source_lines[actual_line]))
{
return 0;
}
return rect.end_char - rect.start_char;
}
StringRect
StrGetl(StringRect rect, int line, bool extend)
{
int actual_line;
if (!rect.source_lines)
{
return StrFullRect(NULL);
}
actual_line = rect.start_line + line;
if (actual_line > rect.end_line)
{
return StrFullRect(NULL);
}
rect.start_line = actual_line;
if (!extend)
{
rect.end_line = actual_line;
}
return rect;
}
StringRect
StrShift(StringRect rect, int n)
{
int new = rect.start_char + n;
if (new > rect.end_char)
{
new = rect.end_char;
}
rect.start_char = new;
return rect;
}
size_t
StrViewLines(StringRect view)
{
if (view.start_line > view.end_line)
{
return 0;
}
return view.end_line - view.start_line + 1;
}
void
PrintRect(StringRect rect)
{
size_t i;
if (!rect.source_lines)
{
return;
}
for (i = 0; i < StrViewLines(rect); i++)
{
char *line = NULL, *tmp;
char cbuf[2] = { 0, '\0' };
size_t chi = 0;
while ((*cbuf = StrGet(rect, i, chi)) != '\0' &&
chi++ <= StrViewChars(rect, i))
{
tmp = line;
line = StrConcat(2, line, cbuf);
Free(tmp);
}
Log(LOG_INFO, line);
Free(line);
}
}

View file

@ -1,7 +1,10 @@
#include <XEP393.h> #include <XEP393.h>
#include <StringSplit.h>
#include <Cytoplasm/Memory.h> #include <Cytoplasm/Memory.h>
#include <Cytoplasm/Array.h> #include <Cytoplasm/Array.h>
#include <Cytoplasm/Str.h>
#include <Cytoplasm/Log.h> #include <Cytoplasm/Log.h>
#include <string.h> #include <string.h>
@ -62,190 +65,223 @@ XEP393FreeElement(XEP393Element *element)
XEP393FreeElementBase(element, false); XEP393FreeElementBase(element, false);
} }
typedef struct StrView { static StringRect
char *start; DecodeQuote(StringRect rect, size_t *skip)
char *end;
bool heap_free;
} StrView;
#define ViewLength(v) ((size_t) ((v.end) - (v.start)))
static char *
StringifyView(StrView v)
{ {
char *r; StringRect ret = StrFullRect(NULL);
size_t len; int lines = 0;
if (!v.start || v.start > v.end)
/* C abuse of chaining operations */
while ((StrGet(rect, lines, 0) == '>') && ++lines)
{ {
return NULL; if (!ret.source_lines)
{
int shift_by = 1, ch;
ret = rect;
ret.end_line = 0;
while ((ch = StrGet(rect, lines - 1, shift_by)) && isspace(ch))
{
shift_by++;
} }
len = ViewLength(v); if (ch)
r = Malloc(len + 1); {
memcpy(r, v.start, len); ret = StrShift(ret, shift_by);
r[len] = '\0'; }
continue;
}
ret.end_line++;
}
return r; if (!lines)
{
return StrFullRect(NULL);
}
if (skip)
{
*skip = lines;
}
return ret;
} }
static StrView static StringRect
CreateStaticView(char *str) DecodeSpan(StringRect rect, char del, size_t *skip)
{ {
StrView view = { StringRect ret = StrFullRect(NULL);
.start = str, int chars = 0;
.end = str + strlen(str), char c;
.heap_free = false
};
return view; if (StrGet(rect, 0, 0) != del)
}
static bool
IdentifySpans(char span_tag, char end_tag, StrView in, StrView *view)
{
size_t length;
bool found = false, equal, flag;
char prev = '\0';
if (in.start >= in.end)
{ {
return false; return ret;
}
if (ViewLength(in) < 2)
{
return false;
} }
equal = span_tag == end_tag; rect = StrShift(rect, 1);
flag = equal && isspace(*(in.start + 1)); /* C abuse of chaining operations */
if (*in.start != span_tag || flag) while (((c = StrGet(rect, 0, chars)) != del) && ++chars)
{ {
/* The opening styling directive MUST NOT be followed if (!c)
* by a whitespace character */ {
return false; return StrFullRect(NULL);
}
if (!ret.source_lines && isspace(c))
{
return StrFullRect(NULL);
} }
view->start = in.start + 1;
in.start += 1;
for (length = 0; ViewLength(in) > 0; length++, in.start++) if (!ret.source_lines)
{ {
if (*in.start == end_tag) ret = rect;
ret.end_char = ret.start_char;
continue;
}
ret.end_char++;
}
ret.end_char++;
if (!chars)
{
return StrFullRect(NULL);
}
{
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
size_t i;
for (i = 0; i < StrViewChars(ret, 0); i++)
{
*chara = StrGet(ret, 0, i);
if (!*chara)
{ {
found = true;
break; break;
} }
prev = *in.start; temp = gen;
} gen = StrConcat(2, gen, chara);
if (!found || !length || (prev && equal && isspace(prev))) Free(temp);
{
/* the closing styling directive MUST NOT be preceeded
* by a whitespace character. */
return false;
} }
view->end = in.start;
return true; Free(gen);
}
if (skip)
{
*skip = chars;
}
return ret;
} }
#define IdentifySpan(span_tag, in, view) IdentifySpans(span_tag, span_tag, in, view)
#define BLOCK_QUOTE (1 << 0)
#define BLOCK_CODES (1 << 1)
static void static void
XEP393Decode(StrView view, XEP393Element *root) ParseLine(XEP393Element *elem, StringRect line)
{ {
StrView subview = view; XEP393Element *span_item, *line_item;
StrView textview = view; StringRect shifted;
XEP393Element *text, *span; size_t ch_idx, chars = StrViewChars(line, 0);
bool managed = false; size_t text_start = 0;
char prev = '\0', curr = '\0'; size_t i;
textview.end = subview.start; for (ch_idx = 0; ch_idx < chars; ch_idx++)
for (; subview.start < subview.end; subview.start++)
{ {
bool sol = false; char curr = StrGet(line, 0, ch_idx);
StrView span_view; StringRect span;
managed = false; shifted = line;
curr = *subview.start; shifted.start_char += ch_idx;
if (prev == '\0' || prev == '\n')
{ #define HandleSpan(del, sym) \
/* TODO: Start of line, start parsing blocks. */ if (curr == del && \
sol = true; (span = DecodeSpan(shifted, del, NULL)).source_lines) \
} { \
#define Spanify(xep_symbol) \ size_t text_end = ch_idx; \
managed = true; \
textview.end = subview.start; \
text = CreateElementVessel( \
root, XEP393_TEXT \
); \
text->text_data = StringifyView(textview); \
\ \
/* Found a span. */ \ { \
span = CreateElementVessel( \ char *temp, *gen = NULL, chara[2] = { 0, '\0' }; \
root, xep_symbol \ for (i = text_start; i < text_end; i++) \
); \ { \
*chara = StrGet(line, 0, i); \
\ \
XEP393Decode(span_view, span); \ temp = gen; \
gen = StrConcat(2, gen, chara); \
Free(temp); \
} \
line_item = CreateElementVessel(elem, XEP393_TEXT); \
line_item->text_data = gen; \
} \
\ \
/* Update subview */ \ span_item = CreateElementVessel(elem, sym); \
subview.start = span_view.end + 1; \ ParseLine(span_item, span); \
\ text_start = span.end_char - line.start_char + 1; \
/* Update textview */ \ ch_idx = span.end_char; \
textview.start = subview.start; \ continue; \
textview.end = subview.start
if (IdentifySpan('_', subview, &span_view))
{
Spanify(XEP393_ITALIC);
}
else if (IdentifySpan('*', subview, &span_view))
{
Spanify(XEP393_EMPH);
}
else if (IdentifySpan('`', subview, &span_view))
{
Spanify(XEP393_MONO);
}
else if (curr == '\n')
{
/* TODO: Remove this */
span_view.start = subview.start;
span_view.end = subview.start;
Spanify(XEP393_NL);
}
else if (sol && IdentifySpans('>', '\n', subview, &span_view))
{
/* TODO: This doesnt work with more than one line of quotes. */
Spanify(XEP393_QUOT);
}
else
{
/* Text character: update end */
textview.end = subview.start;
} }
prev = curr; HandleSpan('*', XEP393_EMPH);
HandleSpan('_', XEP393_ITALIC);
HandleSpan('~', XEP393_SRKE);
HandleSpan('`', XEP393_MONO);
}
{
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
for (i = text_start; i < chars; i++)
{
*chara = StrGet(line, 0, i);
temp = gen;
gen = StrConcat(2, gen, chara);
Free(temp);
}
line_item = CreateElementVessel(elem, XEP393_TEXT);
line_item->text_data = gen;
}
}
static void
XEP393Parse(XEP393Element *root, StringRect region, int flags)
{
size_t i, lines = StrViewLines(region);
for (i = 0; i < lines; i++)
{
StringRect extend_line = StrGetl(region, i, true);
StringRect single_line = StrGetl(region, i, false);
size_t jump_by = 0;
XEP393Element *sub;
if ((flags & BLOCK_QUOTE) && (StrGet(single_line, 0, 0) == '>'))
{
StringRect quote = DecodeQuote(extend_line, &jump_by);
sub = CreateElementVessel(root, XEP393_QUOT);
XEP393Parse(sub, quote, flags);
i += jump_by - 1;
continue;
} }
if (!managed) /* TODO: Parse the single line properly. */
if (!(flags & BLOCK_CODES))
{ {
textview.end = subview.start; sub = CreateElementVessel(root, XEP393_LINE);
text = CreateElementVessel( ParseLine(sub, single_line);
root, XEP393_TEXT continue;
); }
text->text_data = StringifyView(textview);
} }
} }
XEP393Element * XEP393Element *
XEP393(char *message) XEP393(char *message)
{ {
StrView view = CreateStaticView(message); char **lines = StrSplitLines(message);
StringRect view = StrFullRect(lines);
XEP393Element *root = CreateElementVessel(NULL, XEP393_ROOT); XEP393Element *root = CreateElementVessel(NULL, XEP393_ROOT);
/* TODO: Parse blocks first, *then* spans. Considering the XEP393Parse(root, view, BLOCK_QUOTE);
* current architecture, this shouldn't be too hard to integrate,
* given how string views already manage boundaries, and elements StrFreeLines(lines);
* can already be used to contain blocks I think.
*
* Actually, nevermind, these would be pure pain. Nested blocks,
* unterminated ones, QUOTES. Just hell. I hate parsing this shit. */
XEP393Decode(view, root);
return root; return root;
} }
@ -267,8 +303,13 @@ ShoveXML(XEP393Element *element, XMLElement *xmlparent)
head = XMLCreateTag("i"); head = XMLCreateTag("i");
XMLAddChild(xmlparent, head); XMLAddChild(xmlparent, head);
break; break;
case XEP393_NL: case XEP393_LINE:
XMLAddChild(xmlparent, XMLCreateTag("br")); head = XMLCreateTag("p");
XMLAddChild(xmlparent, head);
break;
case XEP393_SRKE:
head = XMLCreateTag("s");
XMLAddChild(xmlparent, head);
break; break;
case XEP393_QUOT: case XEP393_QUOT:
head = XMLCreateTag("blockquote"); head = XMLCreateTag("blockquote");

View file

@ -1,6 +1,7 @@
#ifndef PARSEE_STRINGSPLIT_H #ifndef PARSEE_STRINGSPLIT_H
#define PARSEE_STRINGSPLIT_H #define PARSEE_STRINGSPLIT_H
#include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
/* Represents a boundary in a linesplit string */ /* Represents a boundary in a linesplit string */
@ -32,6 +33,29 @@ extern size_t StrLines(char **split);
/* Creates a full zone covering every part of the split */ /* Creates a full zone covering every part of the split */
extern StringRect StrFullRect(char **split); extern StringRect StrFullRect(char **split);
extern char Str /**
* Retrieves a character from a string rectview, or \0 if out of bounds.
* --------------
* Returns: A character at {line}, {col} in C-indices | '\0'
* Modifies: NOTHING */
extern char StrGet(StringRect rect, int line, int col);
/**
* Retrieves a line from a string rectview, or a NULL one
* --------------
* Returns: A stringview that lives along the original
* Modifies: NOTHING */
extern StringRect StrGetl(StringRect rect, int line, bool extend);
/**
* Returns a new stringrect, shifted by N chars.
* --------------
* Returns: A stringview that lives along the original
* Modifies: NOTHING */
extern StringRect StrShift(StringRect rect, int n);
extern size_t StrViewLines(StringRect);
extern size_t StrViewChars(StringRect, int line);
extern void PrintRect(StringRect rect);
#endif #endif

View file

@ -7,9 +7,11 @@ typedef enum XEP393Type {
XEP393_ROOT, XEP393_ROOT,
XEP393_ITALIC, XEP393_ITALIC,
XEP393_EMPH, XEP393_EMPH,
XEP393_SRKE,
XEP393_MONO, XEP393_MONO,
XEP393_TEXT, XEP393_TEXT,
XEP393_QUOT, XEP393_QUOT,
XEP393_LINE,
XEP393_NL XEP393_NL
} XEP393Type; } XEP393Type;
typedef struct XEP393Element { typedef struct XEP393Element {