[MOD/WIP] Mess a bit with the XEP-0393 parser

It took a comical amount of time for me to do that LMAO
This commit is contained in:
LDA 2024-08-01 10:31:59 +02:00
commit cb0e77e7a4
5 changed files with 345 additions and 148 deletions

View file

@ -85,7 +85,7 @@ ParseeCleanup(void *datp)
#define CleanupField(field, timeout, threshold) do \
{ \
size_t fields = 0, cleaned = 0; \
size_t cleaned = 0; \
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
to_delete = ArrayCreate(); \
while (HashMapIterate(field##s, &field, (void **) &val)) \
@ -93,7 +93,6 @@ ParseeCleanup(void *datp)
HashMap *obj = JsonValueAsObject(val); \
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
uint64_t dur = ts - age; \
fields++; \
\
if ((dur > (timeout))) \
{ \
@ -140,7 +139,7 @@ ParseeCleanup(void *datp)
#define CleanupField(field, timeout, threshold) do \
{ \
size_t fields = 0, cleaned = 0; \
size_t cleaned = 0; \
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
to_delete = ArrayCreate(); \
while (HashMapIterate(field##s, &field, (void **) &val)) \
@ -148,7 +147,6 @@ ParseeCleanup(void *datp)
HashMap *obj = JsonValueAsObject(val); \
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
uint64_t dur = ts - age; \
fields++; \
\
if ((dur > (timeout))) \
{ \

View file

@ -1,9 +1,13 @@
#include <StringSplit.h>
#include <Cytoplasm/Memory.h>
#include <Cytoplasm/Str.h>
#include <Cytoplasm/Log.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
char **
StrSplitLines(char *text)
@ -109,3 +113,131 @@ StrFullRect(char **split)
.source_lines = split
});
}
char
StrGet(StringRect rect, int line, int col)
{
int actual_line, actual_col;
char *linep;
if (!rect.source_lines)
{
return '\0';
}
actual_line = rect.start_line + line;
actual_col = rect.start_char + col;
if (actual_line > rect.end_line)
{
return '\0';
}
if (!(linep = rect.source_lines[actual_line]))
{
return '\0';
}
if (actual_col > strlen(linep))
{
return '\0';
}
return linep[actual_col];
}
size_t
StrViewChars(StringRect rect, int line)
{
int actual_line;
char *linep;
if (!rect.source_lines)
{
return 0;
}
actual_line = rect.start_line + line;
if (actual_line > rect.end_line)
{
return 0;
}
if (!(linep = rect.source_lines[actual_line]))
{
return 0;
}
return rect.end_char - rect.start_char;
}
StringRect
StrGetl(StringRect rect, int line, bool extend)
{
int actual_line;
if (!rect.source_lines)
{
return StrFullRect(NULL);
}
actual_line = rect.start_line + line;
if (actual_line > rect.end_line)
{
return StrFullRect(NULL);
}
rect.start_line = actual_line;
if (!extend)
{
rect.end_line = actual_line;
}
return rect;
}
StringRect
StrShift(StringRect rect, int n)
{
int new = rect.start_char + n;
if (new > rect.end_char)
{
new = rect.end_char;
}
rect.start_char = new;
return rect;
}
size_t
StrViewLines(StringRect view)
{
if (view.start_line > view.end_line)
{
return 0;
}
return view.end_line - view.start_line + 1;
}
void
PrintRect(StringRect rect)
{
size_t i;
if (!rect.source_lines)
{
return;
}
for (i = 0; i < StrViewLines(rect); i++)
{
char *line = NULL, *tmp;
char cbuf[2] = { 0, '\0' };
size_t chi = 0;
while ((*cbuf = StrGet(rect, i, chi)) != '\0' &&
chi++ <= StrViewChars(rect, i))
{
tmp = line;
line = StrConcat(2, line, cbuf);
Free(tmp);
}
Log(LOG_INFO, line);
Free(line);
}
}

View file

@ -1,7 +1,10 @@
#include <XEP393.h>
#include <StringSplit.h>
#include <Cytoplasm/Memory.h>
#include <Cytoplasm/Array.h>
#include <Cytoplasm/Str.h>
#include <Cytoplasm/Log.h>
#include <string.h>
@ -62,190 +65,223 @@ XEP393FreeElement(XEP393Element *element)
XEP393FreeElementBase(element, false);
}
typedef struct StrView {
char *start;
char *end;
bool heap_free;
} StrView;
#define ViewLength(v) ((size_t) ((v.end) - (v.start)))
static char *
StringifyView(StrView v)
static StringRect
DecodeQuote(StringRect rect, size_t *skip)
{
char *r;
size_t len;
if (!v.start || v.start > v.end)
StringRect ret = StrFullRect(NULL);
int lines = 0;
/* C abuse of chaining operations */
while ((StrGet(rect, lines, 0) == '>') && ++lines)
{
return NULL;
if (!ret.source_lines)
{
int shift_by = 1, ch;
ret = rect;
ret.end_line = 0;
while ((ch = StrGet(rect, lines - 1, shift_by)) && isspace(ch))
{
shift_by++;
}
len = ViewLength(v);
r = Malloc(len + 1);
memcpy(r, v.start, len);
r[len] = '\0';
if (ch)
{
ret = StrShift(ret, shift_by);
}
continue;
}
ret.end_line++;
}
return r;
if (!lines)
{
return StrFullRect(NULL);
}
if (skip)
{
*skip = lines;
}
return ret;
}
static StrView
CreateStaticView(char *str)
static StringRect
DecodeSpan(StringRect rect, char del, size_t *skip)
{
StrView view = {
.start = str,
.end = str + strlen(str),
.heap_free = false
};
StringRect ret = StrFullRect(NULL);
int chars = 0;
char c;
return view;
}
static bool
IdentifySpans(char span_tag, char end_tag, StrView in, StrView *view)
{
size_t length;
bool found = false, equal, flag;
char prev = '\0';
if (in.start >= in.end)
if (StrGet(rect, 0, 0) != del)
{
return false;
}
if (ViewLength(in) < 2)
{
return false;
return ret;
}
equal = span_tag == end_tag;
rect = StrShift(rect, 1);
flag = equal && isspace(*(in.start + 1));
if (*in.start != span_tag || flag)
/* C abuse of chaining operations */
while (((c = StrGet(rect, 0, chars)) != del) && ++chars)
{
/* The opening styling directive MUST NOT be followed
* by a whitespace character */
return false;
if (!c)
{
return StrFullRect(NULL);
}
if (!ret.source_lines && isspace(c))
{
return StrFullRect(NULL);
}
view->start = in.start + 1;
in.start += 1;
for (length = 0; ViewLength(in) > 0; length++, in.start++)
if (!ret.source_lines)
{
if (*in.start == end_tag)
ret = rect;
ret.end_char = ret.start_char;
continue;
}
ret.end_char++;
}
ret.end_char++;
if (!chars)
{
return StrFullRect(NULL);
}
{
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
size_t i;
for (i = 0; i < StrViewChars(ret, 0); i++)
{
*chara = StrGet(ret, 0, i);
if (!*chara)
{
found = true;
break;
}
prev = *in.start;
}
if (!found || !length || (prev && equal && isspace(prev)))
{
/* the closing styling directive MUST NOT be preceeded
* by a whitespace character. */
return false;
temp = gen;
gen = StrConcat(2, gen, chara);
Free(temp);
}
view->end = in.start;
return true;
Free(gen);
}
if (skip)
{
*skip = chars;
}
return ret;
}
#define IdentifySpan(span_tag, in, view) IdentifySpans(span_tag, span_tag, in, view)
#define BLOCK_QUOTE (1 << 0)
#define BLOCK_CODES (1 << 1)
static void
XEP393Decode(StrView view, XEP393Element *root)
ParseLine(XEP393Element *elem, StringRect line)
{
StrView subview = view;
StrView textview = view;
XEP393Element *text, *span;
bool managed = false;
char prev = '\0', curr = '\0';
XEP393Element *span_item, *line_item;
StringRect shifted;
size_t ch_idx, chars = StrViewChars(line, 0);
size_t text_start = 0;
size_t i;
textview.end = subview.start;
for (; subview.start < subview.end; subview.start++)
for (ch_idx = 0; ch_idx < chars; ch_idx++)
{
bool sol = false;
StrView span_view;
managed = false;
curr = *subview.start;
if (prev == '\0' || prev == '\n')
{
/* TODO: Start of line, start parsing blocks. */
sol = true;
}
#define Spanify(xep_symbol) \
managed = true; \
textview.end = subview.start; \
text = CreateElementVessel( \
root, XEP393_TEXT \
); \
text->text_data = StringifyView(textview); \
char curr = StrGet(line, 0, ch_idx);
StringRect span;
shifted = line;
shifted.start_char += ch_idx;
#define HandleSpan(del, sym) \
if (curr == del && \
(span = DecodeSpan(shifted, del, NULL)).source_lines) \
{ \
size_t text_end = ch_idx; \
\
/* Found a span. */ \
span = CreateElementVessel( \
root, xep_symbol \
); \
{ \
char *temp, *gen = NULL, chara[2] = { 0, '\0' }; \
for (i = text_start; i < text_end; i++) \
{ \
*chara = StrGet(line, 0, i); \
\
XEP393Decode(span_view, span); \
temp = gen; \
gen = StrConcat(2, gen, chara); \
Free(temp); \
} \
line_item = CreateElementVessel(elem, XEP393_TEXT); \
line_item->text_data = gen; \
} \
\
/* Update subview */ \
subview.start = span_view.end + 1; \
\
/* Update textview */ \
textview.start = subview.start; \
textview.end = subview.start
if (IdentifySpan('_', subview, &span_view))
{
Spanify(XEP393_ITALIC);
}
else if (IdentifySpan('*', subview, &span_view))
{
Spanify(XEP393_EMPH);
}
else if (IdentifySpan('`', subview, &span_view))
{
Spanify(XEP393_MONO);
}
else if (curr == '\n')
{
/* TODO: Remove this */
span_view.start = subview.start;
span_view.end = subview.start;
Spanify(XEP393_NL);
}
else if (sol && IdentifySpans('>', '\n', subview, &span_view))
{
/* TODO: This doesnt work with more than one line of quotes. */
Spanify(XEP393_QUOT);
}
else
{
/* Text character: update end */
textview.end = subview.start;
span_item = CreateElementVessel(elem, sym); \
ParseLine(span_item, span); \
text_start = span.end_char - line.start_char + 1; \
ch_idx = span.end_char; \
continue; \
}
prev = curr;
HandleSpan('*', XEP393_EMPH);
HandleSpan('_', XEP393_ITALIC);
HandleSpan('~', XEP393_SRKE);
HandleSpan('`', XEP393_MONO);
}
{
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
for (i = text_start; i < chars; i++)
{
*chara = StrGet(line, 0, i);
temp = gen;
gen = StrConcat(2, gen, chara);
Free(temp);
}
line_item = CreateElementVessel(elem, XEP393_TEXT);
line_item->text_data = gen;
}
}
static void
XEP393Parse(XEP393Element *root, StringRect region, int flags)
{
size_t i, lines = StrViewLines(region);
for (i = 0; i < lines; i++)
{
StringRect extend_line = StrGetl(region, i, true);
StringRect single_line = StrGetl(region, i, false);
size_t jump_by = 0;
XEP393Element *sub;
if ((flags & BLOCK_QUOTE) && (StrGet(single_line, 0, 0) == '>'))
{
StringRect quote = DecodeQuote(extend_line, &jump_by);
sub = CreateElementVessel(root, XEP393_QUOT);
XEP393Parse(sub, quote, flags);
i += jump_by - 1;
continue;
}
if (!managed)
/* TODO: Parse the single line properly. */
if (!(flags & BLOCK_CODES))
{
textview.end = subview.start;
text = CreateElementVessel(
root, XEP393_TEXT
);
text->text_data = StringifyView(textview);
sub = CreateElementVessel(root, XEP393_LINE);
ParseLine(sub, single_line);
continue;
}
}
}
XEP393Element *
XEP393(char *message)
{
StrView view = CreateStaticView(message);
char **lines = StrSplitLines(message);
StringRect view = StrFullRect(lines);
XEP393Element *root = CreateElementVessel(NULL, XEP393_ROOT);
/* TODO: Parse blocks first, *then* spans. Considering the
* current architecture, this shouldn't be too hard to integrate,
* given how string views already manage boundaries, and elements
* can already be used to contain blocks I think.
*
* Actually, nevermind, these would be pure pain. Nested blocks,
* unterminated ones, QUOTES. Just hell. I hate parsing this shit. */
XEP393Decode(view, root);
XEP393Parse(root, view, BLOCK_QUOTE);
StrFreeLines(lines);
return root;
}
@ -267,8 +303,13 @@ ShoveXML(XEP393Element *element, XMLElement *xmlparent)
head = XMLCreateTag("i");
XMLAddChild(xmlparent, head);
break;
case XEP393_NL:
XMLAddChild(xmlparent, XMLCreateTag("br"));
case XEP393_LINE:
head = XMLCreateTag("p");
XMLAddChild(xmlparent, head);
break;
case XEP393_SRKE:
head = XMLCreateTag("s");
XMLAddChild(xmlparent, head);
break;
case XEP393_QUOT:
head = XMLCreateTag("blockquote");

View file

@ -1,6 +1,7 @@
#ifndef PARSEE_STRINGSPLIT_H
#define PARSEE_STRINGSPLIT_H
#include <stdbool.h>
#include <stdlib.h>
/* Represents a boundary in a linesplit string */
@ -32,6 +33,29 @@ extern size_t StrLines(char **split);
/* Creates a full zone covering every part of the split */
extern StringRect StrFullRect(char **split);
extern char Str
/**
* Retrieves a character from a string rectview, or \0 if out of bounds.
* --------------
* Returns: A character at {line}, {col} in C-indices | '\0'
* Modifies: NOTHING */
extern char StrGet(StringRect rect, int line, int col);
/**
* Retrieves a line from a string rectview, or a NULL one
* --------------
* Returns: A stringview that lives along the original
* Modifies: NOTHING */
extern StringRect StrGetl(StringRect rect, int line, bool extend);
/**
* Returns a new stringrect, shifted by N chars.
* --------------
* Returns: A stringview that lives along the original
* Modifies: NOTHING */
extern StringRect StrShift(StringRect rect, int n);
extern size_t StrViewLines(StringRect);
extern size_t StrViewChars(StringRect, int line);
extern void PrintRect(StringRect rect);
#endif

View file

@ -7,9 +7,11 @@ typedef enum XEP393Type {
XEP393_ROOT,
XEP393_ITALIC,
XEP393_EMPH,
XEP393_SRKE,
XEP393_MONO,
XEP393_TEXT,
XEP393_QUOT,
XEP393_LINE,
XEP393_NL
} XEP393Type;
typedef struct XEP393Element {