mirror of
https://forge.fsky.io/lda/Parsee.git
synced 2026-03-13 21:15:11 +00:00
[MOD/WIP] Mess a bit with the XEP-0393 parser
It took a comical amount of time for me to do that LMAO
This commit is contained in:
parent
8edd929c45
commit
cb0e77e7a4
5 changed files with 345 additions and 148 deletions
359
src/XEP-0393.c
359
src/XEP-0393.c
|
|
@ -1,7 +1,10 @@
|
|||
#include <XEP393.h>
|
||||
|
||||
#include <StringSplit.h>
|
||||
|
||||
#include <Cytoplasm/Memory.h>
|
||||
#include <Cytoplasm/Array.h>
|
||||
#include <Cytoplasm/Str.h>
|
||||
#include <Cytoplasm/Log.h>
|
||||
|
||||
#include <string.h>
|
||||
|
|
@ -62,190 +65,223 @@ XEP393FreeElement(XEP393Element *element)
|
|||
XEP393FreeElementBase(element, false);
|
||||
}
|
||||
|
||||
typedef struct StrView {
|
||||
char *start;
|
||||
char *end;
|
||||
|
||||
bool heap_free;
|
||||
} StrView;
|
||||
#define ViewLength(v) ((size_t) ((v.end) - (v.start)))
|
||||
static char *
|
||||
StringifyView(StrView v)
|
||||
static StringRect
|
||||
DecodeQuote(StringRect rect, size_t *skip)
|
||||
{
|
||||
char *r;
|
||||
size_t len;
|
||||
if (!v.start || v.start > v.end)
|
||||
StringRect ret = StrFullRect(NULL);
|
||||
int lines = 0;
|
||||
|
||||
/* C abuse of chaining operations */
|
||||
while ((StrGet(rect, lines, 0) == '>') && ++lines)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
len = ViewLength(v);
|
||||
r = Malloc(len + 1);
|
||||
memcpy(r, v.start, len);
|
||||
r[len] = '\0';
|
||||
|
||||
return r;
|
||||
}
|
||||
static StrView
|
||||
CreateStaticView(char *str)
|
||||
{
|
||||
StrView view = {
|
||||
.start = str,
|
||||
.end = str + strlen(str),
|
||||
.heap_free = false
|
||||
};
|
||||
|
||||
return view;
|
||||
}
|
||||
static bool
|
||||
IdentifySpans(char span_tag, char end_tag, StrView in, StrView *view)
|
||||
{
|
||||
size_t length;
|
||||
bool found = false, equal, flag;
|
||||
char prev = '\0';
|
||||
if (in.start >= in.end)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (ViewLength(in) < 2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
equal = span_tag == end_tag;
|
||||
|
||||
flag = equal && isspace(*(in.start + 1));
|
||||
if (*in.start != span_tag || flag)
|
||||
{
|
||||
/* The opening styling directive MUST NOT be followed
|
||||
* by a whitespace character */
|
||||
return false;
|
||||
}
|
||||
view->start = in.start + 1;
|
||||
in.start += 1;
|
||||
|
||||
for (length = 0; ViewLength(in) > 0; length++, in.start++)
|
||||
{
|
||||
if (*in.start == end_tag)
|
||||
if (!ret.source_lines)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
int shift_by = 1, ch;
|
||||
ret = rect;
|
||||
ret.end_line = 0;
|
||||
|
||||
while ((ch = StrGet(rect, lines - 1, shift_by)) && isspace(ch))
|
||||
{
|
||||
shift_by++;
|
||||
}
|
||||
|
||||
if (ch)
|
||||
{
|
||||
ret = StrShift(ret, shift_by);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ret.end_line++;
|
||||
}
|
||||
|
||||
if (!lines)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
if (skip)
|
||||
{
|
||||
*skip = lines;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
static StringRect
|
||||
DecodeSpan(StringRect rect, char del, size_t *skip)
|
||||
{
|
||||
StringRect ret = StrFullRect(NULL);
|
||||
int chars = 0;
|
||||
char c;
|
||||
|
||||
if (StrGet(rect, 0, 0) != del)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
rect = StrShift(rect, 1);
|
||||
|
||||
/* C abuse of chaining operations */
|
||||
while (((c = StrGet(rect, 0, chars)) != del) && ++chars)
|
||||
{
|
||||
if (!c)
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
if (!ret.source_lines && isspace(c))
|
||||
{
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
prev = *in.start;
|
||||
if (!ret.source_lines)
|
||||
{
|
||||
ret = rect;
|
||||
ret.end_char = ret.start_char;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret.end_char++;
|
||||
}
|
||||
if (!found || !length || (prev && equal && isspace(prev)))
|
||||
ret.end_char++;
|
||||
|
||||
if (!chars)
|
||||
{
|
||||
/* the closing styling directive MUST NOT be preceeded
|
||||
* by a whitespace character. */
|
||||
return false;
|
||||
return StrFullRect(NULL);
|
||||
}
|
||||
|
||||
view->end = in.start;
|
||||
return true;
|
||||
{
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
|
||||
size_t i;
|
||||
for (i = 0; i < StrViewChars(ret, 0); i++)
|
||||
{
|
||||
*chara = StrGet(ret, 0, i);
|
||||
if (!*chara)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
temp = gen;
|
||||
gen = StrConcat(2, gen, chara);
|
||||
Free(temp);
|
||||
}
|
||||
|
||||
|
||||
Free(gen);
|
||||
}
|
||||
|
||||
if (skip)
|
||||
{
|
||||
*skip = chars;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#define IdentifySpan(span_tag, in, view) IdentifySpans(span_tag, span_tag, in, view)
|
||||
|
||||
|
||||
#define BLOCK_QUOTE (1 << 0)
|
||||
#define BLOCK_CODES (1 << 1)
|
||||
static void
|
||||
XEP393Decode(StrView view, XEP393Element *root)
|
||||
ParseLine(XEP393Element *elem, StringRect line)
|
||||
{
|
||||
StrView subview = view;
|
||||
StrView textview = view;
|
||||
XEP393Element *text, *span;
|
||||
bool managed = false;
|
||||
char prev = '\0', curr = '\0';
|
||||
XEP393Element *span_item, *line_item;
|
||||
StringRect shifted;
|
||||
size_t ch_idx, chars = StrViewChars(line, 0);
|
||||
size_t text_start = 0;
|
||||
size_t i;
|
||||
|
||||
textview.end = subview.start;
|
||||
for (; subview.start < subview.end; subview.start++)
|
||||
for (ch_idx = 0; ch_idx < chars; ch_idx++)
|
||||
{
|
||||
bool sol = false;
|
||||
StrView span_view;
|
||||
managed = false;
|
||||
curr = *subview.start;
|
||||
if (prev == '\0' || prev == '\n')
|
||||
{
|
||||
/* TODO: Start of line, start parsing blocks. */
|
||||
sol = true;
|
||||
}
|
||||
#define Spanify(xep_symbol) \
|
||||
managed = true; \
|
||||
textview.end = subview.start; \
|
||||
text = CreateElementVessel( \
|
||||
root, XEP393_TEXT \
|
||||
); \
|
||||
text->text_data = StringifyView(textview); \
|
||||
\
|
||||
/* Found a span. */ \
|
||||
span = CreateElementVessel( \
|
||||
root, xep_symbol \
|
||||
); \
|
||||
\
|
||||
XEP393Decode(span_view, span); \
|
||||
\
|
||||
/* Update subview */ \
|
||||
subview.start = span_view.end + 1; \
|
||||
\
|
||||
/* Update textview */ \
|
||||
textview.start = subview.start; \
|
||||
textview.end = subview.start
|
||||
if (IdentifySpan('_', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_ITALIC);
|
||||
}
|
||||
else if (IdentifySpan('*', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_EMPH);
|
||||
}
|
||||
else if (IdentifySpan('`', subview, &span_view))
|
||||
{
|
||||
Spanify(XEP393_MONO);
|
||||
}
|
||||
else if (curr == '\n')
|
||||
{
|
||||
/* TODO: Remove this */
|
||||
span_view.start = subview.start;
|
||||
span_view.end = subview.start;
|
||||
Spanify(XEP393_NL);
|
||||
}
|
||||
else if (sol && IdentifySpans('>', '\n', subview, &span_view))
|
||||
{
|
||||
/* TODO: This doesnt work with more than one line of quotes. */
|
||||
Spanify(XEP393_QUOT);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Text character: update end */
|
||||
textview.end = subview.start;
|
||||
char curr = StrGet(line, 0, ch_idx);
|
||||
StringRect span;
|
||||
shifted = line;
|
||||
shifted.start_char += ch_idx;
|
||||
|
||||
#define HandleSpan(del, sym) \
|
||||
if (curr == del && \
|
||||
(span = DecodeSpan(shifted, del, NULL)).source_lines) \
|
||||
{ \
|
||||
size_t text_end = ch_idx; \
|
||||
\
|
||||
{ \
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' }; \
|
||||
for (i = text_start; i < text_end; i++) \
|
||||
{ \
|
||||
*chara = StrGet(line, 0, i); \
|
||||
\
|
||||
temp = gen; \
|
||||
gen = StrConcat(2, gen, chara); \
|
||||
Free(temp); \
|
||||
} \
|
||||
line_item = CreateElementVessel(elem, XEP393_TEXT); \
|
||||
line_item->text_data = gen; \
|
||||
} \
|
||||
\
|
||||
span_item = CreateElementVessel(elem, sym); \
|
||||
ParseLine(span_item, span); \
|
||||
text_start = span.end_char - line.start_char + 1; \
|
||||
ch_idx = span.end_char; \
|
||||
continue; \
|
||||
}
|
||||
|
||||
prev = curr;
|
||||
HandleSpan('*', XEP393_EMPH);
|
||||
HandleSpan('_', XEP393_ITALIC);
|
||||
HandleSpan('~', XEP393_SRKE);
|
||||
HandleSpan('`', XEP393_MONO);
|
||||
}
|
||||
|
||||
if (!managed)
|
||||
{
|
||||
textview.end = subview.start;
|
||||
text = CreateElementVessel(
|
||||
root, XEP393_TEXT
|
||||
);
|
||||
text->text_data = StringifyView(textview);
|
||||
char *temp, *gen = NULL, chara[2] = { 0, '\0' };
|
||||
for (i = text_start; i < chars; i++)
|
||||
{
|
||||
*chara = StrGet(line, 0, i);
|
||||
|
||||
temp = gen;
|
||||
gen = StrConcat(2, gen, chara);
|
||||
Free(temp);
|
||||
}
|
||||
line_item = CreateElementVessel(elem, XEP393_TEXT);
|
||||
line_item->text_data = gen;
|
||||
}
|
||||
}
|
||||
static void
|
||||
XEP393Parse(XEP393Element *root, StringRect region, int flags)
|
||||
{
|
||||
size_t i, lines = StrViewLines(region);
|
||||
|
||||
for (i = 0; i < lines; i++)
|
||||
{
|
||||
StringRect extend_line = StrGetl(region, i, true);
|
||||
StringRect single_line = StrGetl(region, i, false);
|
||||
size_t jump_by = 0;
|
||||
XEP393Element *sub;
|
||||
|
||||
if ((flags & BLOCK_QUOTE) && (StrGet(single_line, 0, 0) == '>'))
|
||||
{
|
||||
StringRect quote = DecodeQuote(extend_line, &jump_by);
|
||||
sub = CreateElementVessel(root, XEP393_QUOT);
|
||||
XEP393Parse(sub, quote, flags);
|
||||
|
||||
i += jump_by - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* TODO: Parse the single line properly. */
|
||||
if (!(flags & BLOCK_CODES))
|
||||
{
|
||||
sub = CreateElementVessel(root, XEP393_LINE);
|
||||
ParseLine(sub, single_line);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
XEP393Element *
|
||||
XEP393(char *message)
|
||||
{
|
||||
StrView view = CreateStaticView(message);
|
||||
char **lines = StrSplitLines(message);
|
||||
StringRect view = StrFullRect(lines);
|
||||
XEP393Element *root = CreateElementVessel(NULL, XEP393_ROOT);
|
||||
|
||||
/* TODO: Parse blocks first, *then* spans. Considering the
|
||||
* current architecture, this shouldn't be too hard to integrate,
|
||||
* given how string views already manage boundaries, and elements
|
||||
* can already be used to contain blocks I think.
|
||||
*
|
||||
* Actually, nevermind, these would be pure pain. Nested blocks,
|
||||
* unterminated ones, QUOTES. Just hell. I hate parsing this shit. */
|
||||
XEP393Decode(view, root);
|
||||
XEP393Parse(root, view, BLOCK_QUOTE);
|
||||
|
||||
StrFreeLines(lines);
|
||||
return root;
|
||||
}
|
||||
|
||||
|
|
@ -267,8 +303,13 @@ ShoveXML(XEP393Element *element, XMLElement *xmlparent)
|
|||
head = XMLCreateTag("i");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_NL:
|
||||
XMLAddChild(xmlparent, XMLCreateTag("br"));
|
||||
case XEP393_LINE:
|
||||
head = XMLCreateTag("p");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_SRKE:
|
||||
head = XMLCreateTag("s");
|
||||
XMLAddChild(xmlparent, head);
|
||||
break;
|
||||
case XEP393_QUOT:
|
||||
head = XMLCreateTag("blockquote");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue