mirror of
https://forge.fsky.io/lda/Parsee.git
synced 2026-03-13 17:05:11 +00:00
[MOD/WIP] Mess a bit with the XEP-0393 parser
It took a comical amount of time for me to do that LMAO
This commit is contained in:
parent
8edd929c45
commit
cb0e77e7a4
5 changed files with 345 additions and 148 deletions
|
|
@ -85,7 +85,7 @@ ParseeCleanup(void *datp)
|
|||
|
||||
#define CleanupField(field, timeout, threshold) do \
|
||||
{ \
|
||||
size_t fields = 0, cleaned = 0; \
|
||||
size_t cleaned = 0; \
|
||||
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
|
||||
to_delete = ArrayCreate(); \
|
||||
while (HashMapIterate(field##s, &field, (void **) &val)) \
|
||||
|
|
@ -93,7 +93,6 @@ ParseeCleanup(void *datp)
|
|||
HashMap *obj = JsonValueAsObject(val); \
|
||||
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
|
||||
uint64_t dur = ts - age; \
|
||||
fields++; \
|
||||
\
|
||||
if ((dur > (timeout))) \
|
||||
{ \
|
||||
|
|
@ -140,7 +139,7 @@ ParseeCleanup(void *datp)
|
|||
|
||||
#define CleanupField(field, timeout, threshold) do \
|
||||
{ \
|
||||
size_t fields = 0, cleaned = 0; \
|
||||
size_t cleaned = 0; \
|
||||
field##s = JsonValueAsObject(HashMapGet(json, #field"s")); \
|
||||
to_delete = ArrayCreate(); \
|
||||
while (HashMapIterate(field##s, &field, (void **) &val)) \
|
||||
|
|
@ -148,7 +147,6 @@ ParseeCleanup(void *datp)
|
|||
HashMap *obj = JsonValueAsObject(val); \
|
||||
uint64_t age = JsonValueAsInteger(HashMapGet(obj, "age")); \
|
||||
uint64_t dur = ts - age; \
|
||||
fields++; \
|
||||
\
|
||||
if ((dur > (timeout))) \
|
||||
{ \
|
||||
|
|
|
|||
132
src/StrSplit.c
132
src/StrSplit.c
|
|
@ -1,9 +1,13 @@
|
|||
#include <StringSplit.h>
|
||||
|
||||
#include <Cytoplasm/Memory.h>
|
||||
#include <Cytoplasm/Str.h>
|
||||
#include <Cytoplasm/Log.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
char **
|
||||
StrSplitLines(char *text)
|
||||
|
|
@ -109,3 +113,131 @@ StrFullRect(char **split)
|
|||
.source_lines = split
|
||||
});
|
||||
}
|
||||
|
||||
char
|
||||
StrGet(StringRect rect, int line, int col)
|
||||
{
|
||||
int actual_line, actual_col;
|
||||
char *linep;
|
||||
if (!rect.source_lines)
|
||||
{
|
||||
return '\0';
|
||||
}
|
||||
|
||||
actual_line = rect.start_line + line;
|
||||
actual_col = rect.start_char + col;
|
||||
|
||||
if (actual_line > rect.end_line)
|
||||
{
|
||||
return '\0';
|
||||
}
|
||||
|
||||
if (!(linep = rect.source_lines[actual_line]))
|
||||
{
|
||||
return '\0';
|
||||
}
|
||||
if (actual_col > strlen(linep))
|
||||
{
|
||||
return '\0';
|
||||
}
|
||||
|
||||
return linep[actual_col];
|
||||
}
|
||||
size_t
|
||||
StrViewChars(StringRect rect, int line)
|
||||
{
|
||||
int actual_line;
|
||||
char *linep;
|
||||
if (!rect.source_lines)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
actual_line = rect.start_line + line;
|
||||
|
||||
if (actual_line > rect.end_line)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(linep = rect.source_lines[actual_line]))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return rect.end_char - rect.start_char;
|
||||
}
|
||||
|
||||
StringRect
|
||||
StrGetl(StringRect rect, int line, bool extend)
|
||||
{
|
||||
int actual_line;
|
||||
if (!rect.source_lines)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
actual_line = rect.start_line + line;
|
||||
|
||||
if (actual_line > rect.end_line)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
rect.start_line = actual_line;
|
||||
if (!extend)
|
||||
{
|
||||
rect.end_line = actual_line;
|
||||
}
|
||||
return rect;
|
||||
}
|
||||
StringRect
|
||||
StrShift(StringRect rect, int n)
|
||||
{
|
||||
int new = rect.start_char + n;
|
||||
if (new > rect.end_char)
|
||||
{
|
||||
new = rect.end_char;
|
||||
}
|
||||
rect.start_char = new;
|
||||
|
||||
return rect;
|
||||
}
|
||||
|
||||
size_t
|
||||
StrViewLines(StringRect view)
|
||||
{
|
||||
if (view.start_line > view.end_line)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
return view.end_line - view.start_line + 1;
|
||||
}
|
||||
|
||||
void
|
||||
PrintRect(StringRect rect)
|
||||
{
|
||||
size_t i;
|
||||
if (!rect.source_lines)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < StrViewLines(rect); i++)
|
||||
{
|
||||
char *line = NULL, *tmp;
|
||||
char cbuf[2] = { 0, '\0' };
|
||||
size_t chi = 0;
|
||||
|
||||
while ((*cbuf = StrGet(rect, i, chi)) != '\0' &&
|
||||
chi++ <= StrViewChars(rect, i))
|
||||
{
|
||||
tmp = line;
|
||||
line = StrConcat(2, line, cbuf);
|
||||
Free(tmp);
|
||||
}
|
||||
|
||||
Log(LOG_INFO, line);
|
||||
Free(line);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
325
src/XEP-0393.c
325
src/XEP-0393.c
|
|
@ -1,7 +1,10 @@
|
|||
#include <XEP393.h>
|
||||
|
||||
#include <StringSplit.h>
|
||||
|
||||
#include <Cytoplasm/Memory.h>
|
||||
#include <Cytoplasm/Array.h>
|
||||
#include <Cytoplasm/Str.h>
|
||||
#include <Cytoplasm/Log.h>
|
||||
|
||||
#include <string.h>
|
||||
|
|
@ -62,190 +65,223 @@ XEP393FreeElement(XEP393Element *element)
|
|||
XEP393FreeElementBase(element, false);
|
||||
}
|
||||
|
||||
typedef struct StrView {
|
||||
char *start;
|
||||
char *end;
|
||||
|
||||
bool heap_free;
|
||||
} StrView;
|
||||
#define ViewLength(v) ((size_t) ((v.end) - (v.start)))
|
||||
static char *
|
||||
StringifyView(StrView v)
|
||||
static StringRect
|
||||
DecodeQuote(StringRect rect, size_t *skip)
|
||||
{
|
||||
char *r;
|
||||
size_t len;
|
||||
if (!v.start || v.start > v.end)
|
||||
StringRect ret = StrFullRect(NULL);
|
||||
int lines = 0;
|
||||
|
||||
/* C abuse of chaining operations */
|
||||
while ((StrGet(rect, lines, 0) == '>') && ++lines)
|
||||
{
|
||||
return NULL;
|
||||
if (!ret.source_lines)
|
||||
{
|
||||
int shift_by = 1, ch;
|
||||
ret = rect;
|
||||
ret.end_line = 0;
|
||||
|
||||
while ((ch = StrGet(rect, lines - 1, shift_by)) && isspace(ch))
|
||||
{
|
||||
shift_by++;
|
||||
}
|
||||
|
||||
len = ViewLength(v);
|
||||
r = Malloc(len + 1);
|
||||
memcpy(r, v.start, len);
|
||||
r[len] = '\0';
|
||||
if (ch)
|
||||
{
|
||||
ret = StrShift(ret, shift_by);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ret.end_line++;
|
||||
}
|
||||
|
||||
return r;
|
||||
if (!lines)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
if (skip)
|
||||
{
|
||||
*skip = lines;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
static StrView
|
||||
CreateStaticView(char *str)
|
||||
static StringRect
|
||||
DecodeSpan(StringRect rect, char del, size_t *skip)
|
||||
{
|
||||
StrView view = {
|
||||
.start = str,
|
||||
.end = str + strlen(str),
|
||||
.heap_free = false
|
||||
};
|
||||
StringRect ret = StrFullRect(NULL);
|
||||
int chars = 0;
|
||||
char c;
|
||||
|
||||
return view;
|
||||
}
|
||||
static bool
|
||||
IdentifySpans(char span_tag, char end_tag, StrView in, StrView *view)
|
||||
{
|
||||
size_t length;
|
||||
bool found = false, equal, flag;
|
||||
char prev = '\0';
|
||||
if (in.start >= in.end)
|
||||
if (StrGet(rect, 0, 0) != del)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (ViewLength(in) < 2)
|
||||
{
|
||||
return false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
equal = span_tag == end_tag;
|
||||
rect = StrShift(rect, 1);
|
||||
|
||||
flag = equal && isspace(*(in.start + 1));
|
||||
if (*in.start != span_tag || flag)
|
||||
/* C abuse of chaining operations */
|
||||
while (((c = StrGet(rect, 0, chars)) != del) && ++chars)
|
||||
{
|
||||
/* The opening styling directive MUST NOT be followed
|
||||
* by a whitespace character */
|
||||
return false;
|
||||
if (!c)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
if (!ret.source_lines && isspace(c))
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
view->start = in.start + 1;
|
||||
in.start += 1;
|
||||
|
||||
for (length = 0; ViewLength(in) > 0; length++, in.start++)
|
||||
if (!ret.source_lines)
|
||||
{
|
||||
if (*in.start == end_tag)
|
||||
ret = rect;
|
||||
ret.end_char = ret.start_char;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret.end_char++;
|
||||
}
|
||||
ret.end_char++;
|
||||
|
||||
if (!chars)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
{
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
|
||||
size_t i;
|
||||
for (i = 0; i < StrViewChars(ret, 0); i++)
|
||||
{
|
||||
*chara = StrGet(ret, 0, i);
|
||||
if (!*chara)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
prev = *in.start;
|
||||
}
|
||||
if (!found || !length || (prev && equal && isspace(prev)))
|
||||
{
|
||||
/* the closing styling directive MUST NOT be preceeded
|
||||
* by a whitespace character. */
|
||||
return false;
|
||||
temp = gen;
|
||||
gen = StrConcat(2, gen, chara);
|
||||
Free(temp);
|
||||
}
|
||||
|
||||
view->end = in.start;
|
||||
return true;
|
||||
|
||||
Free(gen);
|
||||
}
|
||||
|
||||
if (skip)
|
||||
{
|
||||
*skip = chars;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#define IdentifySpan(span_tag, in, view) IdentifySpans(span_tag, span_tag, in, view)
|
||||
|
||||
|
||||
#define BLOCK_QUOTE (1 << 0)
|
||||
#define BLOCK_CODES (1 << 1)
|
||||
static void
|
||||
XEP393Decode(StrView view, XEP393Element *root)
|
||||
ParseLine(XEP393Element *elem, StringRect line)
|
||||
{
|
||||
StrView subview = view;
|
||||
StrView textview = view;
|
||||
XEP393Element *text, *span;
|
||||
bool managed = false;
|
||||
char prev = '\0', curr = '\0';
|
||||
XEP393Element *span_item, *line_item;
|
||||
StringRect shifted;
|
||||
size_t ch_idx, chars = StrViewChars(line, 0);
|
||||
size_t text_start = 0;
|
||||
size_t i;
|
||||
|
||||
textview.end = subview.start;
|
||||
for (; subview.start < subview.end; subview.start++)
|
||||
for (ch_idx = 0; ch_idx < chars; ch_idx++)
|
||||
{
|
||||
bool sol = false;
|
||||
StrView span_view;
|
||||
managed = false;
|
||||
curr = *subview.start;
|
||||
if (prev == '\0' || prev == '\n')
|
||||
{
|
||||
/* TODO: Start of line, start parsing blocks. */
|
||||
sol = true;
|
||||
}
|
||||
#define Spanify(xep_symbol) \
|
||||
managed = true; \
|
||||
textview.end = subview.start; \
|
||||
text = CreateElementVessel( \
|
||||
root, XEP393_TEXT \
|
||||
); \
|
||||
text->text_data = StringifyView(textview); \
|
||||
char curr = StrGet(line, 0, ch_idx);
|
||||
StringRect span;
|
||||
shifted = line;
|
||||
shifted.start_char += ch_idx;
|
||||
|
||||
#define HandleSpan(del, sym) \
|
||||
if (curr == del && \
|
||||
(span = DecodeSpan(shifted, del, NULL)).source_lines) \
|
||||
{ \
|
||||
size_t text_end = ch_idx; \
|
||||
\
|
||||
/* Found a span. */ \
|
||||
span = CreateElementVessel( \
|
||||
root, xep_symbol \
|
||||
); \
|
||||
{ \
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' }; \
|
||||
for (i = text_start; i < text_end; i++) \
|
||||
{ \
|
||||
*chara = StrGet(line, 0, i); \
|
||||
\
|
||||
XEP393Decode(span_view, span); \
|
||||
temp = gen; \
|
||||
gen = StrConcat(2, gen, chara); \
|
||||
Free(temp); \
|
||||
} \
|
||||
line_item = CreateElementVessel(elem, XEP393_TEXT); \
|
||||
line_item->text_data = gen; \
|
||||
} \
|
||||
\
|
||||
/* Update subview */ \
|
||||
subview.start = span_view.end + 1; \
|
||||
\
|
||||
/* Update textview */ \
|
||||
textview.start = subview.start; \
|
||||
textview.end = subview.start
|
||||
if (IdentifySpan('_', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_ITALIC);
|
||||
}
|
||||
else if (IdentifySpan('*', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_EMPH);
|
||||
}
|
||||
else if (IdentifySpan('`', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_MONO);
|
||||
}
|
||||
else if (curr == '\n')
|
||||
{
|
||||
/* TODO: Remove this */
|
||||
span_view.start = subview.start;
|
||||
span_view.end = subview.start;
|
||||
Spanify(XEP393_NL);
|
||||
}
|
||||
else if (sol && IdentifySpans('>', '\n', subview, &span_view))
|
||||
{
|
||||
/* TODO: This doesnt work with more than one line of quotes. */
|
||||
Spanify(XEP393_QUOT);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Text character: update end */
|
||||
textview.end = subview.start;
|
||||
span_item = CreateElementVessel(elem, sym); \
|
||||
ParseLine(span_item, span); \
|
||||
text_start = span.end_char - line.start_char + 1; \
|
||||
ch_idx = span.end_char; \
|
||||
continue; \
|
||||
}
|
||||
|
||||
prev = curr;
|
||||
HandleSpan('*', XEP393_EMPH);
|
||||
HandleSpan('_', XEP393_ITALIC);
|
||||
HandleSpan('~', XEP393_SRKE);
|
||||
HandleSpan('`', XEP393_MONO);
|
||||
}
|
||||
{
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
|
||||
for (i = text_start; i < chars; i++)
|
||||
{
|
||||
*chara = StrGet(line, 0, i);
|
||||
|
||||
temp = gen;
|
||||
gen = StrConcat(2, gen, chara);
|
||||
Free(temp);
|
||||
}
|
||||
line_item = CreateElementVessel(elem, XEP393_TEXT);
|
||||
line_item->text_data = gen;
|
||||
}
|
||||
}
|
||||
static void
|
||||
XEP393Parse(XEP393Element *root, StringRect region, int flags)
|
||||
{
|
||||
size_t i, lines = StrViewLines(region);
|
||||
|
||||
for (i = 0; i < lines; i++)
|
||||
{
|
||||
StringRect extend_line = StrGetl(region, i, true);
|
||||
StringRect single_line = StrGetl(region, i, false);
|
||||
size_t jump_by = 0;
|
||||
XEP393Element *sub;
|
||||
|
||||
if ((flags & BLOCK_QUOTE) && (StrGet(single_line, 0, 0) == '>'))
|
||||
{
|
||||
StringRect quote = DecodeQuote(extend_line, &jump_by);
|
||||
sub = CreateElementVessel(root, XEP393_QUOT);
|
||||
XEP393Parse(sub, quote, flags);
|
||||
|
||||
i += jump_by - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!managed)
|
||||
/* TODO: Parse the single line properly. */
|
||||
if (!(flags & BLOCK_CODES))
|
||||
{
|
||||
textview.end = subview.start;
|
||||
text = CreateElementVessel(
|
||||
root, XEP393_TEXT
|
||||
);
|
||||
text->text_data = StringifyView(textview);
|
||||
sub = CreateElementVessel(root, XEP393_LINE);
|
||||
ParseLine(sub, single_line);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
XEP393Element *
|
||||
XEP393(char *message)
|
||||
{
|
||||
StrView view = CreateStaticView(message);
|
||||
char **lines = StrSplitLines(message);
|
||||
StringRect view = StrFullRect(lines);
|
||||
XEP393Element *root = CreateElementVessel(NULL, XEP393_ROOT);
|
||||
|
||||
/* TODO: Parse blocks first, *then* spans. Considering the
|
||||
* current architecture, this shouldn't be too hard to integrate,
|
||||
* given how string views already manage boundaries, and elements
|
||||
* can already be used to contain blocks I think.
|
||||
*
|
||||
* Actually, nevermind, these would be pure pain. Nested blocks,
|
||||
* unterminated ones, QUOTES. Just hell. I hate parsing this shit. */
|
||||
XEP393Decode(view, root);
|
||||
XEP393Parse(root, view, BLOCK_QUOTE);
|
||||
|
||||
StrFreeLines(lines);
|
||||
return root;
|
||||
}
|
||||
|
||||
|
|
@ -267,8 +303,13 @@ ShoveXML(XEP393Element *element, XMLElement *xmlparent)
|
|||
head = XMLCreateTag("i");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_NL:
|
||||
XMLAddChild(xmlparent, XMLCreateTag("br"));
|
||||
case XEP393_LINE:
|
||||
head = XMLCreateTag("p");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_SRKE:
|
||||
head = XMLCreateTag("s");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_QUOT:
|
||||
head = XMLCreateTag("blockquote");
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef PARSEE_STRINGSPLIT_H
|
||||
#define PARSEE_STRINGSPLIT_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Represents a boundary in a linesplit string */
|
||||
|
|
@ -32,6 +33,29 @@ extern size_t StrLines(char **split);
|
|||
/* Creates a full zone covering every part of the split */
|
||||
extern StringRect StrFullRect(char **split);
|
||||
|
||||
extern char Str
|
||||
/**
|
||||
* Retrieves a character from a string rectview, or \0 if out of bounds.
|
||||
* --------------
|
||||
* Returns: A character at {line}, {col} in C-indices | '\0'
|
||||
* Modifies: NOTHING */
|
||||
extern char StrGet(StringRect rect, int line, int col);
|
||||
|
||||
/**
|
||||
* Retrieves a line from a string rectview, or a NULL one
|
||||
* --------------
|
||||
* Returns: A stringview that lives along the original
|
||||
* Modifies: NOTHING */
|
||||
extern StringRect StrGetl(StringRect rect, int line, bool extend);
|
||||
|
||||
/**
|
||||
* Returns a new stringrect, shifted by N chars.
|
||||
* --------------
|
||||
* Returns: A stringview that lives along the original
|
||||
* Modifies: NOTHING */
|
||||
extern StringRect StrShift(StringRect rect, int n);
|
||||
|
||||
extern size_t StrViewLines(StringRect);
|
||||
extern size_t StrViewChars(StringRect, int line);
|
||||
|
||||
extern void PrintRect(StringRect rect);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -7,9 +7,11 @@ typedef enum XEP393Type {
|
|||
XEP393_ROOT,
|
||||
XEP393_ITALIC,
|
||||
XEP393_EMPH,
|
||||
XEP393_SRKE,
|
||||
XEP393_MONO,
|
||||
XEP393_TEXT,
|
||||
XEP393_QUOT,
|
||||
XEP393_LINE,
|
||||
XEP393_NL
|
||||
} XEP393Type;
|
||||
typedef struct XEP393Element {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue