2025-02-13 03:25:43 +08:00
|
|
|
/* Filtering of objects based on simple expressions.
|
|
|
|
|
* This powers the FILTER option of Vector Sets, but it is otherwise
|
|
|
|
|
* general code to be used when we want to tell if a given object (with fields)
|
|
|
|
|
* passes or fails a given test for scalars, strings, ...
|
|
|
|
|
*
|
|
|
|
|
* Copyright(C) 2024-2025 Salvatore Sanfilippo. All Rights Reserved.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
|
|
#define EXPR_TOKEN_EOF 0
|
|
|
|
|
#define EXPR_TOKEN_NUM 1
|
|
|
|
|
#define EXPR_TOKEN_STR 2
|
|
|
|
|
#define EXPR_TOKEN_TUPLE 3
|
|
|
|
|
#define EXPR_TOKEN_SELECTOR 4
|
|
|
|
|
#define EXPR_TOKEN_OP 5
|
|
|
|
|
|
|
|
|
|
#define EXPR_OP_OPAREN 0 /* ( */
|
|
|
|
|
#define EXPR_OP_CPAREN 1 /* ) */
|
|
|
|
|
#define EXPR_OP_NOT 2 /* ! */
|
|
|
|
|
#define EXPR_OP_POW 3 /* ** */
|
|
|
|
|
#define EXPR_OP_MULT 4 /* * */
|
|
|
|
|
#define EXPR_OP_DIV 5 /* / */
|
|
|
|
|
#define EXPR_OP_MOD 6 /* % */
|
|
|
|
|
#define EXPR_OP_SUM 7 /* + */
|
|
|
|
|
#define EXPR_OP_DIFF 8 /* - */
|
|
|
|
|
#define EXPR_OP_GT 9 /* > */
|
|
|
|
|
#define EXPR_OP_GTE 10 /* >= */
|
|
|
|
|
#define EXPR_OP_LT 11 /* < */
|
|
|
|
|
#define EXPR_OP_LTE 12 /* <= */
|
|
|
|
|
#define EXPR_OP_EQ 13 /* == */
|
|
|
|
|
#define EXPR_OP_NEQ 14 /* != */
|
|
|
|
|
#define EXPR_OP_IN 15 /* in */
|
|
|
|
|
#define EXPR_OP_AND 16 /* and */
|
|
|
|
|
#define EXPR_OP_OR 17 /* or */
|
|
|
|
|
|
|
|
|
|
/* This structure represents a token in our expression. It's either
|
|
|
|
|
* literals like 4, "foo", or operators like "+", "-", "and", or
|
|
|
|
|
* json selectors, that start with a dot: ".age", ".properties.somearray[1]" */
|
|
|
|
|
typedef struct exprtoken {
|
|
|
|
|
int token_type; // Token type of the just parsed token.
|
2025-02-16 16:59:10 +08:00
|
|
|
int offset; // Chars offset in expression.
|
2025-02-13 03:25:43 +08:00
|
|
|
union {
|
|
|
|
|
double num; // Value for EXPR_TOKEN_NUM.
|
|
|
|
|
struct {
|
|
|
|
|
char *start; // String pointer for EXPR_TOKEN_STR / SELECTOR.
|
|
|
|
|
size_t len; // String len for EXPR_TOKEN_STR / SELECTOR.
|
|
|
|
|
} str;
|
|
|
|
|
int opcode; // Opcode ID for EXPR_TOKEN_OP.
|
|
|
|
|
struct {
|
|
|
|
|
struct exprtoken *ele;
|
|
|
|
|
size_t len;
|
|
|
|
|
} tuple; // Tuples are like [1, 2, 3] for "in" operator.
|
|
|
|
|
};
|
|
|
|
|
} exprtoken;
|
|
|
|
|
|
|
|
|
|
/* Simple stack of expr tokens. This is used both to represent the stack
|
|
|
|
|
* of values and the stack of operands during VM execution. */
|
|
|
|
|
typedef struct exprstack {
|
|
|
|
|
exprtoken **items;
|
|
|
|
|
int numitems;
|
|
|
|
|
int allocsize;
|
|
|
|
|
} exprstack;
|
|
|
|
|
|
|
|
|
|
typedef struct exprstate {
|
|
|
|
|
char *expr; /* Expression string to compile. Note that
|
|
|
|
|
* expression token strings point directly to this
|
|
|
|
|
* string. */
|
|
|
|
|
char *p; // Currnet position inside 'expr', while parsing.
|
|
|
|
|
exprtoken current; // Last token parsed.
|
|
|
|
|
int syntax_error; // True if a syntax error was found compiling.
|
|
|
|
|
|
|
|
|
|
// Virtual machine state.
|
|
|
|
|
exprstack values_stack;
|
|
|
|
|
exprstack ops_stack;
|
2025-02-16 16:59:10 +08:00
|
|
|
exprstack tokens; // Expression processed into a sequence of tokens.
|
|
|
|
|
exprstack program; // Expression compiled into opcodes and values.
|
2025-02-13 03:25:43 +08:00
|
|
|
int ip; // Instruction pointer inside "program".
|
|
|
|
|
} exprstate;
|
|
|
|
|
|
|
|
|
|
/* Valid operators. */
|
|
|
|
|
struct {
|
|
|
|
|
char *opname;
|
|
|
|
|
int oplen;
|
|
|
|
|
int opcode;
|
|
|
|
|
int precedence;
|
2025-02-16 16:59:10 +08:00
|
|
|
int arity;
|
2025-02-13 03:25:43 +08:00
|
|
|
} ExprOptable[] = {
|
2025-02-16 16:59:10 +08:00
|
|
|
{"(", 1, EXPR_OP_OPAREN, 7, 0},
|
|
|
|
|
{")", 1, EXPR_OP_CPAREN, 7, 0},
|
|
|
|
|
{"!", 1, EXPR_OP_NOT, 6, 1},
|
|
|
|
|
{"not", 3, EXPR_OP_NOT, 6, 1},
|
|
|
|
|
{"**", 2, EXPR_OP_POW, 5, 2},
|
|
|
|
|
{"*", 1, EXPR_OP_MULT, 4, 2},
|
|
|
|
|
{"/", 1, EXPR_OP_DIV, 4, 2},
|
|
|
|
|
{"%", 1, EXPR_OP_MOD, 4, 2},
|
|
|
|
|
{"+", 1, EXPR_OP_SUM, 3, 2},
|
|
|
|
|
{"-", 1, EXPR_OP_DIFF, 3, 2},
|
|
|
|
|
{">", 1, EXPR_OP_GT, 2, 2},
|
|
|
|
|
{">=", 2, EXPR_OP_GTE, 2, 2},
|
|
|
|
|
{"<", 1, EXPR_OP_LT, 2, 2},
|
|
|
|
|
{"<=", 2, EXPR_OP_LTE, 2, 2},
|
|
|
|
|
{"==", 2, EXPR_OP_EQ, 2, 2},
|
|
|
|
|
{"!=", 2, EXPR_OP_NEQ, 2, 2},
|
|
|
|
|
{"in", 2, EXPR_OP_IN, 2, 2},
|
|
|
|
|
{"and", 3, EXPR_OP_AND, 1, 2},
|
|
|
|
|
{"&& ", 2, EXPR_OP_AND, 1, 2},
|
|
|
|
|
{"or", 2, EXPR_OP_OR, 0, 2},
|
|
|
|
|
{"||", 2, EXPR_OP_OR, 0, 2},
|
|
|
|
|
{NULL, 0, 0, 0, 0} // Terminator.
|
2025-02-13 03:25:43 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* ============================== Stack handling ============================ */
|
|
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
#define EXPR_STACK_INITIAL_SIZE 16
|
|
|
|
|
|
|
|
|
|
/* Initialize a new expression stack. */
|
|
|
|
|
void exprStackInit(exprstack *stack) {
|
|
|
|
|
stack->items = malloc(sizeof(exprtoken*) * EXPR_STACK_INITIAL_SIZE);
|
|
|
|
|
stack->numitems = 0;
|
|
|
|
|
stack->allocsize = EXPR_STACK_INITIAL_SIZE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Push a token pointer onto the stack. Return 0 on out of memory
|
|
|
|
|
* (leaving the stack as it is), 1 on success. */
|
|
|
|
|
int exprStackPush(exprstack *stack, exprtoken *token) {
|
|
|
|
|
/* Check if we need to grow the stack. */
|
|
|
|
|
if (stack->numitems == stack->allocsize) {
|
|
|
|
|
size_t newsize = stack->allocsize * 2;
|
|
|
|
|
exprtoken **newitems =
|
|
|
|
|
realloc(stack->items, sizeof(exprtoken*) * newsize);
|
|
|
|
|
if (newitems == NULL) return 0;
|
|
|
|
|
stack->items = newitems;
|
|
|
|
|
stack->allocsize = newsize;
|
|
|
|
|
}
|
|
|
|
|
stack->items[stack->numitems] = token;
|
|
|
|
|
stack->numitems++;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Pop a token pointer from the stack. Return NULL if the stack is
|
|
|
|
|
* empty. */
|
|
|
|
|
exprtoken *exprStackPop(exprstack *stack) {
|
|
|
|
|
if (stack->numitems == 0) return NULL;
|
|
|
|
|
stack->numitems--;
|
|
|
|
|
return stack->items[stack->numitems];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Just return the last element pushed, without consuming it. */
|
|
|
|
|
exprtoken *exprStackPeek(exprstack *stack) {
|
|
|
|
|
if (stack->numitems == 0) return NULL;
|
|
|
|
|
return stack->items[stack->numitems-1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Free the stack structure state, including the items it contains, that are
|
|
|
|
|
* assumed to be heap allocated. The passed pointer itself is not freed. */
|
|
|
|
|
void exprStackFree(exprstack *stack) {
|
|
|
|
|
for (int j = 0; j < stack->numitems; j++)
|
|
|
|
|
free(stack->items[j]);
|
|
|
|
|
free(stack->items);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* =========================== Expression compilation ======================= */
|
|
|
|
|
|
|
|
|
|
void exprConsumeSpaces(exprstate *es) {
|
|
|
|
|
while(es->p[0] && isspace(es->p[0])) es->p++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define EXPR_OP_SPECIALCHARS "+-*%/!()<>="
|
|
|
|
|
void exprParseOperatorOrSelector(exprstate *es) {
|
|
|
|
|
es->current.token_type = es->p[0] == '.' ? EXPR_TOKEN_SELECTOR : EXPR_TOKEN_OP;
|
|
|
|
|
char *start = es->p;
|
|
|
|
|
while(es->p[0] &&
|
|
|
|
|
(isalpha(es->p[0]) ||
|
|
|
|
|
strchr(EXPR_OP_SPECIALCHARS,es->p[0]) != NULL))
|
|
|
|
|
{
|
|
|
|
|
es->p++;
|
|
|
|
|
}
|
|
|
|
|
size_t matchlen = es->p - start;
|
|
|
|
|
|
|
|
|
|
/* If this is not a selector for an attribute to retrive, then
|
|
|
|
|
* it must be one of the valid operators. */
|
|
|
|
|
size_t bestlen = 0;
|
|
|
|
|
if (es->current.token_type == EXPR_TOKEN_OP) {
|
|
|
|
|
int j;
|
|
|
|
|
printf("maxlen: %d\n", (int) matchlen);
|
|
|
|
|
for (j = 0; ExprOptable[j].opname != NULL; j++) {
|
|
|
|
|
if (ExprOptable[j].oplen > matchlen) continue;
|
|
|
|
|
if (memcmp(ExprOptable[j].opname, start, ExprOptable[j].oplen) != 0)
|
|
|
|
|
{
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
printf("%s\n", ExprOptable[j].opname);
|
|
|
|
|
if (ExprOptable[j].oplen > bestlen) {
|
|
|
|
|
es->current.opcode = ExprOptable[j].opcode;
|
|
|
|
|
bestlen = ExprOptable[j].oplen;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (bestlen == 0) {
|
|
|
|
|
printf("HERE %s len:%d\n", start, (int)matchlen);
|
|
|
|
|
es->syntax_error++;
|
|
|
|
|
} else {
|
|
|
|
|
es->p = start + bestlen;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
es->current.str.start = start;
|
|
|
|
|
es->current.str.len = matchlen;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void exprParseNumber(exprstate *es) {
|
|
|
|
|
es->current.token_type = EXPR_TOKEN_NUM;
|
|
|
|
|
char num[64];
|
|
|
|
|
int idx = 0;
|
|
|
|
|
while(isdigit(es->p[0]) || (idx == 0 && es->p[0] == '-')) {
|
|
|
|
|
if (idx == sizeof(num)-1) {
|
|
|
|
|
es->syntax_error++; // Number is too long.
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
num[idx++] = es->p[0];
|
|
|
|
|
es->p++;
|
|
|
|
|
}
|
|
|
|
|
num[idx] = 0;
|
|
|
|
|
|
|
|
|
|
char *endptr;
|
|
|
|
|
es->current.num = strtod(num, &endptr);
|
|
|
|
|
if (*endptr != '\0') es->syntax_error++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void exprParseString(exprstate *es) {
|
|
|
|
|
char quote = es->p[0]; /* Store the quote type (' or "). */
|
|
|
|
|
es->p++; /* Skip opening quote. */
|
|
|
|
|
|
|
|
|
|
es->current.token_type = EXPR_TOKEN_STR;
|
|
|
|
|
es->current.str.start = es->p;
|
|
|
|
|
|
|
|
|
|
while(es->p[0] != '\0') {
|
|
|
|
|
if (es->p[0] == '\\' && es->p[1] != '\0') {
|
|
|
|
|
es->p += 2; // Skip escaped char.
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
if (es->p[0] == quote) {
|
|
|
|
|
es->current.str.len =
|
|
|
|
|
es->p - es->current.str.start;
|
|
|
|
|
es->p++; // Skip closing quote.
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
es->p++;
|
|
|
|
|
}
|
|
|
|
|
/* If we reach here, string was not terminated. */
|
|
|
|
|
es->syntax_error++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Deallocate the object returned by exprCompile(). */
|
|
|
|
|
void exprFree(exprstate *es) {
|
|
|
|
|
if (es == NULL) return;
|
|
|
|
|
|
|
|
|
|
/* Free the original expression string. */
|
|
|
|
|
if (es->expr) free(es->expr);
|
|
|
|
|
|
|
|
|
|
/* Free all stacks. */
|
|
|
|
|
exprStackFree(&es->values_stack);
|
|
|
|
|
exprStackFree(&es->ops_stack);
|
2025-02-16 16:59:10 +08:00
|
|
|
exprStackFree(&es->tokens);
|
2025-02-13 03:25:43 +08:00
|
|
|
exprStackFree(&es->program);
|
|
|
|
|
|
|
|
|
|
/* Free the state object itself. */
|
|
|
|
|
free(es);
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
/* Split the provided expression into a stack of tokens. Returns
|
|
|
|
|
* 0 on success, 1 on error. */
|
|
|
|
|
int exprTokenize(exprstate *es, char *expr, char **errptr) {
|
2025-02-13 03:25:43 +08:00
|
|
|
/* Main parsing loop. */
|
|
|
|
|
while(1) {
|
|
|
|
|
exprConsumeSpaces(es);
|
|
|
|
|
|
|
|
|
|
/* Set a flag to see if we can consider the - part of the
|
|
|
|
|
* number, or an operator. */
|
|
|
|
|
int minus_is_number = 0; // By default is an operator.
|
|
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
exprtoken *last = exprStackPeek(&es->tokens);
|
2025-02-13 03:25:43 +08:00
|
|
|
if (last == NULL) {
|
|
|
|
|
/* If we are at the start of an expression, the minus is
|
|
|
|
|
* considered a number. */
|
|
|
|
|
minus_is_number = 1;
|
|
|
|
|
} else if (last->token_type == EXPR_TOKEN_OP &&
|
|
|
|
|
last->opcode != EXPR_OP_CPAREN)
|
|
|
|
|
{
|
|
|
|
|
/* Also, if the previous token was an operator, the minus
|
|
|
|
|
* is considered a number, unless the previous operator is
|
|
|
|
|
* a closing parens. In such case it's like (...) -5, or alike
|
|
|
|
|
* and we want to emit an operator. */
|
|
|
|
|
minus_is_number = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse based on the current character. */
|
|
|
|
|
if (*es->p == '\0') {
|
|
|
|
|
es->current.token_type = EXPR_TOKEN_EOF;
|
|
|
|
|
} else if (isdigit(*es->p) ||
|
|
|
|
|
(minus_is_number && *es->p == '-' && isdigit(es->p[1])))
|
|
|
|
|
{
|
|
|
|
|
exprParseNumber(es);
|
|
|
|
|
} else if (*es->p == '"' || *es->p == '\'') {
|
|
|
|
|
exprParseString(es);
|
|
|
|
|
} else if (*es->p == '.' || isalpha(*es->p) ||
|
|
|
|
|
strchr(EXPR_OP_SPECIALCHARS, *es->p)) {
|
|
|
|
|
exprParseOperatorOrSelector(es);
|
|
|
|
|
} else {
|
|
|
|
|
es->syntax_error++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (es->syntax_error) {
|
|
|
|
|
if (errptr) *errptr = expr + (es->p - es->expr);
|
2025-02-16 16:59:10 +08:00
|
|
|
return 1; // Syntax Error.
|
2025-02-13 03:25:43 +08:00
|
|
|
}
|
|
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
/* Allocate and copy current token to tokens stack */
|
2025-02-13 03:25:43 +08:00
|
|
|
exprtoken *token = malloc(sizeof(exprtoken));
|
2025-02-16 16:59:10 +08:00
|
|
|
if (!token) return 1; // OOM.
|
2025-02-13 03:25:43 +08:00
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
*token = es->current; /* Copy the entire structure. */
|
|
|
|
|
token->offset = es->p - es->expr; /* To report errors gracefully. */
|
2025-02-13 03:25:43 +08:00
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
if (!exprStackPush(&es->tokens, token)) {
|
2025-02-13 03:25:43 +08:00
|
|
|
free(token);
|
2025-02-16 16:59:10 +08:00
|
|
|
return 1; // OOM.
|
2025-02-13 03:25:43 +08:00
|
|
|
}
|
|
|
|
|
if (es->current.token_type == EXPR_TOKEN_EOF) break;
|
|
|
|
|
}
|
2025-02-16 16:59:10 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
2025-02-13 03:25:43 +08:00
|
|
|
|
2025-02-16 17:32:38 +08:00
|
|
|
/* Helper function to get operator precedence from the operator table. */
|
|
|
|
|
int exprGetOpPrecedence(int opcode) {
|
|
|
|
|
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
|
|
|
|
if (ExprOptable[i].opcode == opcode)
|
|
|
|
|
return ExprOptable[i].precedence;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Helper function to get operator arity from the operator table. */
|
|
|
|
|
int exprGetOpArity(int opcode) {
|
|
|
|
|
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
|
|
|
|
if (ExprOptable[i].opcode == opcode)
|
|
|
|
|
return ExprOptable[i].arity;
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Process an operator during compilation. Returns 0 on success, 1 on error. */
|
|
|
|
|
int exprProcessOperator(exprstate *es, exprtoken *op, int *stack_items, char **errptr) {
|
|
|
|
|
if (op->opcode == EXPR_OP_OPAREN) {
|
|
|
|
|
// This is just a marker for us. Do nothing.
|
|
|
|
|
if (!exprStackPush(&es->ops_stack, op)) return 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (op->opcode == EXPR_OP_CPAREN) {
|
|
|
|
|
/* Process operators until we find the matching opening parenthesis. */
|
|
|
|
|
while (1) {
|
|
|
|
|
exprtoken *top_op = exprStackPop(&es->ops_stack);
|
|
|
|
|
if (top_op == NULL) {
|
|
|
|
|
if (errptr) *errptr = es->expr + op->offset;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (top_op->opcode == EXPR_OP_OPAREN) {
|
|
|
|
|
free(top_op); // Free the opening parenthesis token.
|
|
|
|
|
free(op); // Free the closing parenthesis token.
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int arity = exprGetOpArity(top_op->opcode);
|
|
|
|
|
if (*stack_items < arity) {
|
|
|
|
|
if (errptr) *errptr = es->expr + top_op->offset;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!exprStackPush(&es->program, top_op)) return 1;
|
|
|
|
|
*stack_items = *stack_items - arity + 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int curr_prec = exprGetOpPrecedence(op->opcode);
|
|
|
|
|
|
|
|
|
|
/* Process operators with higher or equal precedence. */
|
|
|
|
|
while (1) {
|
|
|
|
|
exprtoken *top_op = exprStackPeek(&es->ops_stack);
|
|
|
|
|
if (top_op == NULL || top_op->opcode == EXPR_OP_OPAREN) break;
|
|
|
|
|
|
|
|
|
|
int top_prec = exprGetOpPrecedence(top_op->opcode);
|
|
|
|
|
if (top_prec < curr_prec) break;
|
|
|
|
|
|
|
|
|
|
/* Pop and add to program. */
|
|
|
|
|
top_op = exprStackPop(&es->ops_stack);
|
|
|
|
|
int arity = exprGetOpArity(top_op->opcode);
|
|
|
|
|
if (*stack_items < arity) {
|
|
|
|
|
if (errptr) *errptr = es->expr + top_op->offset;
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!exprStackPush(&es->program, top_op)) return 1;
|
|
|
|
|
*stack_items = *stack_items - arity + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Push current operator. */
|
|
|
|
|
if (!exprStackPush(&es->ops_stack, op)) return 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Compile the expression into a set of push-value and exec-operator
|
|
|
|
|
* that exprRun() can execute. The function returns an expstate object
|
|
|
|
|
* that can be used for execution of the program. On error, NULL
|
|
|
|
|
* is returned, and optionally the position of the error into the
|
|
|
|
|
* expression is returned by reference. */
|
2025-02-16 16:59:10 +08:00
|
|
|
exprstate *exprCompile(char *expr, char **errptr) {
|
|
|
|
|
/* Initialize expression state. */
|
|
|
|
|
exprstate *es = malloc(sizeof(exprstate));
|
|
|
|
|
if (!es) return NULL;
|
|
|
|
|
|
|
|
|
|
es->expr = strdup(expr);
|
2025-02-16 17:32:38 +08:00
|
|
|
if (!es->expr) {
|
|
|
|
|
free(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2025-02-16 16:59:10 +08:00
|
|
|
es->p = es->expr;
|
|
|
|
|
es->syntax_error = 0;
|
|
|
|
|
|
|
|
|
|
/* Initialize all stacks. */
|
|
|
|
|
exprStackInit(&es->values_stack);
|
|
|
|
|
exprStackInit(&es->ops_stack);
|
|
|
|
|
exprStackInit(&es->tokens);
|
|
|
|
|
exprStackInit(&es->program);
|
|
|
|
|
|
2025-02-16 17:32:38 +08:00
|
|
|
/* Tokenization. */
|
|
|
|
|
if (exprTokenize(es, expr, errptr)) {
|
2025-02-16 16:59:10 +08:00
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-16 17:32:38 +08:00
|
|
|
/* Compile the expression into a sequence of operations. */
|
|
|
|
|
int stack_items = 0; // Track # of items that would be on the stack
|
|
|
|
|
// during execution. This way we can detect arity
|
|
|
|
|
// issues at compile time.
|
|
|
|
|
|
|
|
|
|
/* Process each token. */
|
|
|
|
|
for (int i = 0; i < es->tokens.numitems; i++) {
|
|
|
|
|
exprtoken *token = es->tokens.items[i];
|
|
|
|
|
|
|
|
|
|
if (token->token_type == EXPR_TOKEN_EOF) break;
|
|
|
|
|
|
|
|
|
|
/* Handle values (numbers, strings, selectors). */
|
|
|
|
|
if (token->token_type == EXPR_TOKEN_NUM ||
|
|
|
|
|
token->token_type == EXPR_TOKEN_STR ||
|
|
|
|
|
token->token_type == EXPR_TOKEN_SELECTOR)
|
|
|
|
|
{
|
|
|
|
|
exprtoken *value_token = malloc(sizeof(exprtoken));
|
|
|
|
|
if (!value_token) {
|
|
|
|
|
if (errptr) *errptr = es->expr + token->offset;
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
*value_token = *token; // Copy the token.
|
|
|
|
|
if (!exprStackPush(&es->program, value_token)) {
|
|
|
|
|
free(value_token);
|
|
|
|
|
if (errptr) *errptr = es->expr + token->offset;
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
stack_items++;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Handle operators. */
|
|
|
|
|
if (token->token_type == EXPR_TOKEN_OP) {
|
|
|
|
|
exprtoken *op_token = malloc(sizeof(exprtoken));
|
|
|
|
|
if (!op_token) {
|
|
|
|
|
if (errptr) *errptr = es->expr + token->offset;
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
*op_token = *token; // Copy the token.
|
|
|
|
|
|
|
|
|
|
if (exprProcessOperator(es, op_token, &stack_items, errptr)) {
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Process remaining operators on the stack. */
|
|
|
|
|
while (es->ops_stack.numitems > 0) {
|
|
|
|
|
exprtoken *op = exprStackPop(&es->ops_stack);
|
|
|
|
|
if (op->opcode == EXPR_OP_OPAREN) {
|
|
|
|
|
if (errptr) *errptr = es->expr + op->offset;
|
|
|
|
|
free(op);
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int arity = exprGetOpArity(op->opcode);
|
|
|
|
|
if (stack_items < arity) {
|
|
|
|
|
if (errptr) *errptr = es->expr + op->offset;
|
|
|
|
|
free(op);
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!exprStackPush(&es->program, op)) {
|
|
|
|
|
free(op);
|
|
|
|
|
if (errptr) *errptr = es->expr + op->offset;
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
stack_items = stack_items - arity + 1;
|
|
|
|
|
}
|
2025-02-16 16:59:10 +08:00
|
|
|
|
2025-02-16 17:32:38 +08:00
|
|
|
/* Verify that exactly one value would remain on the stack after
|
|
|
|
|
* execution. We could also check that such value is a number, but this
|
|
|
|
|
* would make the code more complex without much gains. */
|
|
|
|
|
if (stack_items != 1) {
|
|
|
|
|
if (errptr) {
|
|
|
|
|
/* Point to the last token's offset for error reporting. */
|
|
|
|
|
exprtoken *last = es->tokens.items[es->tokens.numitems - 1];
|
|
|
|
|
*errptr = es->expr + last->offset;
|
|
|
|
|
}
|
|
|
|
|
exprFree(es);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
2025-02-16 16:59:10 +08:00
|
|
|
return es;
|
2025-02-13 03:25:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ============================ Simple test main ============================ */
|
|
|
|
|
|
|
|
|
|
#ifdef TEST_MAIN
|
|
|
|
|
void exprPrintToken(exprtoken *t) {
|
|
|
|
|
switch(t->token_type) {
|
|
|
|
|
case EXPR_TOKEN_EOF:
|
|
|
|
|
printf("EOF");
|
|
|
|
|
break;
|
|
|
|
|
case EXPR_TOKEN_NUM:
|
|
|
|
|
printf("NUM:%g", t->num);
|
|
|
|
|
break;
|
|
|
|
|
case EXPR_TOKEN_STR:
|
|
|
|
|
printf("STR:\"%.*s\"", (int)t->str.len, t->str.start);
|
|
|
|
|
break;
|
|
|
|
|
case EXPR_TOKEN_SELECTOR:
|
|
|
|
|
printf("SEL:%.*s", (int)t->str.len, t->str.start);
|
|
|
|
|
break;
|
|
|
|
|
case EXPR_TOKEN_OP:
|
|
|
|
|
printf("OP:");
|
|
|
|
|
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
|
|
|
|
if (ExprOptable[i].opcode == t->opcode) {
|
|
|
|
|
printf("%s", ExprOptable[i].opname);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
printf("UNKNOWN");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void exprPrintStack(exprstack *stack, const char *name) {
|
|
|
|
|
printf("%s (%d items):", name, stack->numitems);
|
|
|
|
|
for (int j = 0; j < stack->numitems; j++) {
|
|
|
|
|
printf(" ");
|
|
|
|
|
exprPrintToken(stack->items[j]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
|
char *testexpr = "(5*2)-3 and 'foo'";
|
|
|
|
|
if (argc >= 1) testexpr = argv[1];
|
|
|
|
|
|
|
|
|
|
printf("Compiling expression: %s\n", testexpr);
|
|
|
|
|
|
|
|
|
|
char *errptr = NULL;
|
|
|
|
|
exprstate *es = exprCompile(testexpr,&errptr);
|
|
|
|
|
if (es == NULL) {
|
|
|
|
|
printf("Compilation failed near \"...%s\"\n", errptr);
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (es->syntax_error) {
|
|
|
|
|
printf("Syntax error found\n");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-16 16:59:10 +08:00
|
|
|
exprPrintStack(&es->tokens, "Tokens");
|
2025-02-16 17:32:38 +08:00
|
|
|
exprPrintStack(&es->program, "Program");
|
2025-02-13 03:25:43 +08:00
|
|
|
exprFree(es);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
#endif
|