mirror of https://github.com/redis/redis.git
				
				
				
			
		
			
				
	
	
		
			996 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C
		
	
	
	
			
		
		
	
	
			996 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C
		
	
	
	
| /* Filtering of objects based on simple expressions.
 | |
|  * This powers the FILTER option of Vector Sets, but it is otherwise
 | |
|  * general code to be used when we want to tell if a given object (with fields)
 | |
|  * passes or fails a given test for scalars, strings, ...
 | |
|  *
 | |
|  * Copyright(C) 2024-Present, Redis Ltd. All Rights Reserved.
 | |
|  * Originally authored by: Salvatore Sanfilippo.
 | |
|  */
 | |
| 
 | |
| #include <stdio.h>
 | |
| #include <stdlib.h>
 | |
| #include <ctype.h>
 | |
| #include <math.h>
 | |
| #include <string.h>
 | |
| #include "cJSON.h"
 | |
| 
 | |
| #ifdef TEST_MAIN
 | |
| #define RedisModule_Alloc malloc
 | |
| #define RedisModule_Realloc realloc
 | |
| #define RedisModule_Free free
 | |
| #define RedisModule_Strdup strdup
 | |
| #endif
 | |
| 
 | |
| #define EXPR_TOKEN_EOF 0
 | |
| #define EXPR_TOKEN_NUM 1
 | |
| #define EXPR_TOKEN_STR 2
 | |
| #define EXPR_TOKEN_TUPLE 3
 | |
| #define EXPR_TOKEN_SELECTOR 4
 | |
| #define EXPR_TOKEN_OP 5
 | |
| 
 | |
| #define EXPR_OP_OPAREN 0  /* ( */
 | |
| #define EXPR_OP_CPAREN 1  /* ) */
 | |
| #define EXPR_OP_NOT    2  /* ! */
 | |
| #define EXPR_OP_POW    3  /* ** */
 | |
| #define EXPR_OP_MULT   4  /* * */
 | |
| #define EXPR_OP_DIV    5  /* / */
 | |
| #define EXPR_OP_MOD    6  /* % */
 | |
| #define EXPR_OP_SUM    7  /* + */
 | |
| #define EXPR_OP_DIFF   8  /* - */
 | |
| #define EXPR_OP_GT     9  /* > */
 | |
| #define EXPR_OP_GTE    10 /* >= */
 | |
| #define EXPR_OP_LT     11 /* < */
 | |
| #define EXPR_OP_LTE    12 /* <= */
 | |
| #define EXPR_OP_EQ     13 /* == */
 | |
| #define EXPR_OP_NEQ    14 /* != */
 | |
| #define EXPR_OP_IN     15 /* in */
 | |
| #define EXPR_OP_AND    16 /* and */
 | |
| #define EXPR_OP_OR     17 /* or */
 | |
| 
 | |
| /* This structure represents a token in our expression. It's either
 | |
|  * literals like 4, "foo", or operators like "+", "-", "and", or
 | |
|  * json selectors, that start with a dot: ".age", ".properties.somearray[1]" */
 | |
| typedef struct exprtoken {
 | |
|     int refcount;           // Reference counting for memory reclaiming.
 | |
|     int token_type;         // Token type of the just parsed token.
 | |
|     int offset;             // Chars offset in expression.
 | |
|     union {
 | |
|         double num;         // Value for EXPR_TOKEN_NUM.
 | |
|         struct {
 | |
|             char *start;    // String pointer for EXPR_TOKEN_STR / SELECTOR.
 | |
|             size_t len;     // String len for EXPR_TOKEN_STR / SELECTOR.
 | |
|             char *heapstr;  // True if we have a private allocation for this
 | |
|                             // string. When possible, it just references to the
 | |
|                             // string expression we compiled, exprstate->expr.
 | |
|         } str;
 | |
|         int opcode;         // Opcode ID for EXPR_TOKEN_OP.
 | |
|         struct {
 | |
|             struct exprtoken **ele;
 | |
|             size_t len;
 | |
|         } tuple;            // Tuples are like [1, 2, 3] for "in" operator.
 | |
|     };
 | |
| } exprtoken;
 | |
| 
 | |
| /* Simple stack of expr tokens. This is used both to represent the stack
 | |
|  * of values and the stack of operands during VM execution. */
 | |
| typedef struct exprstack {
 | |
|     exprtoken **items;
 | |
|     int numitems;
 | |
|     int allocsize;
 | |
| } exprstack;
 | |
| 
 | |
| typedef struct exprstate {
 | |
|     char *expr;             /* Expression string to compile. Note that
 | |
|                              * expression token strings point directly to this
 | |
|                              * string. */
 | |
|     char *p;                // Currnet position inside 'expr', while parsing.
 | |
| 
 | |
|     // Virtual machine state.
 | |
|     exprstack values_stack;
 | |
|     exprstack ops_stack;    // Operator stack used during compilation.
 | |
|     exprstack tokens;       // Expression processed into a sequence of tokens.
 | |
|     exprstack program;      // Expression compiled into opcodes and values.
 | |
| } exprstate;
 | |
| 
 | |
| /* Valid operators. */
 | |
| struct {
 | |
|     char *opname;
 | |
|     int oplen;
 | |
|     int opcode;
 | |
|     int precedence;
 | |
|     int arity;
 | |
| } ExprOptable[] = {
 | |
|     {"(",   1,  EXPR_OP_OPAREN,  7, 0},
 | |
|     {")",   1,  EXPR_OP_CPAREN,  7, 0},
 | |
|     {"!",   1,  EXPR_OP_NOT,     6, 1},
 | |
|     {"not", 3,  EXPR_OP_NOT,     6, 1},
 | |
|     {"**",  2,  EXPR_OP_POW,     5, 2},
 | |
|     {"*",   1,  EXPR_OP_MULT,    4, 2},
 | |
|     {"/",   1,  EXPR_OP_DIV,     4, 2},
 | |
|     {"%",   1,  EXPR_OP_MOD,     4, 2},
 | |
|     {"+",   1,  EXPR_OP_SUM,     3, 2},
 | |
|     {"-",   1,  EXPR_OP_DIFF,    3, 2},
 | |
|     {">",   1,  EXPR_OP_GT,      2, 2},
 | |
|     {">=",  2,  EXPR_OP_GTE,     2, 2},
 | |
|     {"<",   1,  EXPR_OP_LT,      2, 2},
 | |
|     {"<=",  2,  EXPR_OP_LTE,     2, 2},
 | |
|     {"==",  2,  EXPR_OP_EQ,      2, 2},
 | |
|     {"!=",  2,  EXPR_OP_NEQ,     2, 2},
 | |
|     {"in",  2,  EXPR_OP_IN,      2, 2},
 | |
|     {"and", 3,  EXPR_OP_AND,     1, 2},
 | |
|     {"&&",  2,  EXPR_OP_AND,     1, 2},
 | |
|     {"or",  2,  EXPR_OP_OR,      0, 2},
 | |
|     {"||",  2,  EXPR_OP_OR,      0, 2},
 | |
|     {NULL,  0,  0,               0, 0}   // Terminator.
 | |
| };
 | |
| 
 | |
| #define EXPR_OP_SPECIALCHARS "+-*%/!()<>=|&"
 | |
| #define EXPR_SELECTOR_SPECIALCHARS "_-"
 | |
| 
 | |
| /* ================================ Expr token ============================== */
 | |
| 
 | |
| /* Return an heap allocated token of the specified type, setting the
 | |
|  * reference count to 1. */
 | |
| exprtoken *exprNewToken(int type) {
 | |
|     exprtoken *t = RedisModule_Alloc(sizeof(exprtoken));
 | |
|     memset(t,0,sizeof(*t));
 | |
|     t->token_type = type;
 | |
|     t->refcount = 1;
 | |
|     return t;
 | |
| }
 | |
| 
 | |
| /* Generic free token function, can be used to free stack allocated
 | |
|  * objects (in this case the pointer itself will not be freed) or
 | |
|  * heap allocated objects. See the wrappers below. */
 | |
| void exprTokenRelease(exprtoken *t) {
 | |
|     if (t == NULL) return;
 | |
| 
 | |
|     if (t->refcount <= 0) {
 | |
|         printf("exprTokenRelease() against a token with refcount %d!\n"
 | |
|                "Aborting program execution\n",
 | |
|             t->refcount);
 | |
|         exit(1);
 | |
|     }
 | |
|     t->refcount--;
 | |
|     if (t->refcount > 0) return;
 | |
| 
 | |
|     // We reached refcount 0: free the object.
 | |
|     if (t->token_type == EXPR_TOKEN_STR) {
 | |
|         if (t->str.heapstr != NULL) RedisModule_Free(t->str.heapstr);
 | |
|     } else if (t->token_type == EXPR_TOKEN_TUPLE) {
 | |
|         for (size_t j = 0; j < t->tuple.len; j++)
 | |
|             exprTokenRelease(t->tuple.ele[j]);
 | |
|         if (t->tuple.ele) RedisModule_Free(t->tuple.ele);
 | |
|     }
 | |
|     RedisModule_Free(t);
 | |
| }
 | |
| 
 | |
| void exprTokenRetain(exprtoken *t) {
 | |
|     t->refcount++;
 | |
| }
 | |
| 
 | |
| /* ============================== Stack handling ============================ */
 | |
| 
 | |
| #include <stdlib.h>
 | |
| #include <string.h>
 | |
| 
 | |
| #define EXPR_STACK_INITIAL_SIZE 16
 | |
| 
 | |
| /* Initialize a new expression stack. */
 | |
| void exprStackInit(exprstack *stack) {
 | |
|     stack->items = RedisModule_Alloc(sizeof(exprtoken*) * EXPR_STACK_INITIAL_SIZE);
 | |
|     stack->numitems = 0;
 | |
|     stack->allocsize = EXPR_STACK_INITIAL_SIZE;
 | |
| }
 | |
| 
 | |
| /* Push a token pointer onto the stack. Does not increment the refcount
 | |
|  * of the token: it is up to the caller doing this. */
 | |
| void exprStackPush(exprstack *stack, exprtoken *token) {
 | |
|     /* Check if we need to grow the stack. */
 | |
|     if (stack->numitems == stack->allocsize) {
 | |
|         size_t newsize = stack->allocsize * 2;
 | |
|         exprtoken **newitems =
 | |
|             RedisModule_Realloc(stack->items, sizeof(exprtoken*) * newsize);
 | |
|         stack->items = newitems;
 | |
|         stack->allocsize = newsize;
 | |
|     }
 | |
|     stack->items[stack->numitems] = token;
 | |
|     stack->numitems++;
 | |
| }
 | |
| 
 | |
| /* Pop a token pointer from the stack. Return NULL if the stack is
 | |
|  * empty. Does NOT recrement the refcount of the token, it's up to the
 | |
|  * caller to do so, as the new owner of the reference. */
 | |
| exprtoken *exprStackPop(exprstack *stack) {
 | |
|     if (stack->numitems == 0) return NULL;
 | |
|     stack->numitems--;
 | |
|     return stack->items[stack->numitems];
 | |
| }
 | |
| 
 | |
| /* Just return the last element pushed, without consuming it nor altering
 | |
|  * the reference count. */
 | |
| exprtoken *exprStackPeek(exprstack *stack) {
 | |
|     if (stack->numitems == 0) return NULL;
 | |
|     return stack->items[stack->numitems-1];
 | |
| }
 | |
| 
 | |
| /* Free the stack structure state, including the items it contains, that are
 | |
|  * assumed to be heap allocated. The passed pointer itself is not freed. */
 | |
| void exprStackFree(exprstack *stack) {
 | |
|     for (int j = 0; j < stack->numitems; j++)
 | |
|         exprTokenRelease(stack->items[j]);
 | |
|     RedisModule_Free(stack->items);
 | |
| }
 | |
| 
 | |
| /* Just reset the stack removing all the items, but leaving it in a state
 | |
|  * that makes it still usable for new elements. */
 | |
| void exprStackReset(exprstack *stack) {
 | |
|     for (int j = 0; j < stack->numitems; j++)
 | |
|         exprTokenRelease(stack->items[j]);
 | |
|     stack->numitems = 0;
 | |
| }
 | |
| 
 | |
| /* =========================== Expression compilation ======================= */
 | |
| 
 | |
| void exprConsumeSpaces(exprstate *es) {
 | |
|     while(es->p[0] && isspace(es->p[0])) es->p++;
 | |
| }
 | |
| 
 | |
| /* Parse an operator, trying to match the longer match in the
 | |
|  * operators table. */
 | |
| exprtoken *exprParseOperator(exprstate *es) {
 | |
|     exprtoken *t = exprNewToken(EXPR_TOKEN_OP);
 | |
|     char *start = es->p;
 | |
| 
 | |
|     while(es->p[0] &&
 | |
|           (isalpha(es->p[0]) ||
 | |
|            strchr(EXPR_OP_SPECIALCHARS,es->p[0]) != NULL))
 | |
|     {
 | |
|         es->p++;
 | |
|     }
 | |
| 
 | |
|     int matchlen = es->p - start;
 | |
|     int bestlen = 0;
 | |
|     int j;
 | |
| 
 | |
|     // Find the longest matching operator.
 | |
|     for (j = 0; ExprOptable[j].opname != NULL; j++) {
 | |
|         if (ExprOptable[j].oplen > matchlen) continue;
 | |
|         if (memcmp(ExprOptable[j].opname, start, ExprOptable[j].oplen) != 0)
 | |
|         {
 | |
|             continue;
 | |
|         }
 | |
|         if (ExprOptable[j].oplen > bestlen) {
 | |
|             t->opcode = ExprOptable[j].opcode;
 | |
|             bestlen = ExprOptable[j].oplen;
 | |
|         }
 | |
|     }
 | |
|     if (bestlen == 0) {
 | |
|         exprTokenRelease(t);
 | |
|         return NULL;
 | |
|     } else {
 | |
|         es->p = start + bestlen;
 | |
|     }
 | |
|     return t;
 | |
| }
 | |
| 
 | |
| // Valid selector charset.
 | |
| static int is_selector_char(int c) {
 | |
|     return (isalpha(c) ||
 | |
|             isdigit(c) ||
 | |
|             strchr(EXPR_SELECTOR_SPECIALCHARS,c) != NULL);
 | |
| }
 | |
| 
 | |
| /* Parse selectors, they start with a dot and can have alphanumerical
 | |
|  * or few special chars. */
 | |
| exprtoken *exprParseSelector(exprstate *es) {
 | |
|     exprtoken *t = exprNewToken(EXPR_TOKEN_SELECTOR);
 | |
|     es->p++; // Skip dot.
 | |
|     char *start = es->p;
 | |
| 
 | |
|     while(es->p[0] && is_selector_char(es->p[0])) es->p++;
 | |
|     int matchlen = es->p - start;
 | |
|     t->str.start = start;
 | |
|     t->str.len = matchlen;
 | |
|     return t;
 | |
| }
 | |
| 
 | |
| exprtoken *exprParseNumber(exprstate *es) {
 | |
|     exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
 | |
|     char num[64];
 | |
|     int idx = 0;
 | |
|     while(isdigit(es->p[0]) || es->p[0] == '.' || es->p[0] == 'e' ||
 | |
|           es->p[0] == 'E' || (idx == 0 && es->p[0] == '-'))
 | |
|     {
 | |
|         if (idx >= (int)sizeof(num)-1) {
 | |
|             exprTokenRelease(t);
 | |
|             return NULL;
 | |
|         }
 | |
|         num[idx++] = es->p[0];
 | |
|         es->p++;
 | |
|     }
 | |
|     num[idx] = 0;
 | |
| 
 | |
|     char *endptr;
 | |
|     t->num = strtod(num, &endptr);
 | |
|     if (*endptr != '\0') {
 | |
|         exprTokenRelease(t);
 | |
|         return NULL;
 | |
|     }
 | |
|     return t;
 | |
| }
 | |
| 
 | |
| exprtoken *exprParseString(exprstate *es) {
 | |
|     char quote = es->p[0];  /* Store the quote type (' or "). */
 | |
|     es->p++;                /* Skip opening quote. */
 | |
| 
 | |
|     exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
 | |
|     t->str.start = es->p;
 | |
| 
 | |
|     while(es->p[0] != '\0') {
 | |
|         if (es->p[0] == '\\' && es->p[1] != '\0') {
 | |
|             es->p += 2; // Skip escaped char.
 | |
|             continue;
 | |
|         }
 | |
|         if (es->p[0] == quote) {
 | |
|             t->str.len = es->p - t->str.start;
 | |
|             es->p++; // Skip closing quote.
 | |
|             return t;
 | |
|         }
 | |
|         es->p++;
 | |
|     }
 | |
|     /* If we reach here, string was not terminated. */
 | |
|     exprTokenRelease(t);
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| /* Parse a tuple of the form [1, "foo", 42]. No nested tuples are
 | |
|  * supported. This type is useful mostly to be used with the "IN"
 | |
|  * operator. */
 | |
| exprtoken *exprParseTuple(exprstate *es) {
 | |
|     exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
 | |
|     t->tuple.ele = NULL;
 | |
|     t->tuple.len = 0;
 | |
|     es->p++; /* Skip opening '['. */
 | |
| 
 | |
|     size_t allocated = 0;
 | |
|     while(1) {
 | |
|         exprConsumeSpaces(es);
 | |
| 
 | |
|         /* Check for empty tuple or end. */
 | |
|         if (es->p[0] == ']') {
 | |
|             es->p++;
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         /* Grow tuple array if needed. */
 | |
|         if (t->tuple.len == allocated) {
 | |
|             size_t newsize = allocated == 0 ? 4 : allocated * 2;
 | |
|             exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
 | |
|                 sizeof(exprtoken*) * newsize);
 | |
|             t->tuple.ele = newele;
 | |
|             allocated = newsize;
 | |
|         }
 | |
| 
 | |
|         /* Parse tuple element. */
 | |
|         exprtoken *ele = NULL;
 | |
|         if (isdigit(es->p[0]) || es->p[0] == '-') {
 | |
|             ele = exprParseNumber(es);
 | |
|         } else if (es->p[0] == '"' || es->p[0] == '\'') {
 | |
|             ele = exprParseString(es);
 | |
|         } else {
 | |
|             exprTokenRelease(t);
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         /* Error parsing number/string? */
 | |
|         if (ele == NULL) {
 | |
|             exprTokenRelease(t);
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         /* Store element if no error was detected. */
 | |
|         t->tuple.ele[t->tuple.len] = ele;
 | |
|         t->tuple.len++;
 | |
| 
 | |
|         /* Check for next element. */
 | |
|         exprConsumeSpaces(es);
 | |
|         if (es->p[0] == ']') {
 | |
|             es->p++;
 | |
|             break;
 | |
|         }
 | |
|         if (es->p[0] != ',') {
 | |
|             exprTokenRelease(t);
 | |
|             return NULL;
 | |
|         }
 | |
|         es->p++; /* Skip comma. */
 | |
|     }
 | |
|     return t;
 | |
| }
 | |
| 
 | |
| /* Deallocate the object returned by exprCompile(). */
 | |
| void exprFree(exprstate *es) {
 | |
|     if (es == NULL) return;
 | |
| 
 | |
|     /* Free the original expression string. */
 | |
|     if (es->expr) RedisModule_Free(es->expr);
 | |
| 
 | |
|     /* Free all stacks. */
 | |
|     exprStackFree(&es->values_stack);
 | |
|     exprStackFree(&es->ops_stack);
 | |
|     exprStackFree(&es->tokens);
 | |
|     exprStackFree(&es->program);
 | |
| 
 | |
|     /* Free the state object itself. */
 | |
|     RedisModule_Free(es);
 | |
| }
 | |
| 
 | |
| /* Split the provided expression into a stack of tokens. Returns
 | |
|  * 0 on success, 1 on error. */
 | |
| int exprTokenize(exprstate *es, int *errpos) {
 | |
|     /* Main parsing loop. */
 | |
|     while(1) {
 | |
|         exprConsumeSpaces(es);
 | |
| 
 | |
|         /* Set a flag to see if we can consider the - part of the
 | |
|          * number, or an operator. */
 | |
|         int minus_is_number = 0; // By default is an operator.
 | |
| 
 | |
|         exprtoken *last = exprStackPeek(&es->tokens);
 | |
|         if (last == NULL) {
 | |
|             /* If we are at the start of an expression, the minus is
 | |
|              * considered a number. */
 | |
|             minus_is_number = 1;
 | |
|         } else if (last->token_type == EXPR_TOKEN_OP &&
 | |
|                    last->opcode != EXPR_OP_CPAREN)
 | |
|         {
 | |
|             /* Also, if the previous token was an operator, the minus
 | |
|              * is considered a number, unless the previous operator is
 | |
|              * a closing parens. In such case it's like (...) -5, or alike
 | |
|              * and we want to emit an operator. */
 | |
|             minus_is_number = 1;
 | |
|         }
 | |
| 
 | |
|         /* Parse based on the current character. */
 | |
|         exprtoken *current = NULL;
 | |
|         if (*es->p == '\0') {
 | |
|             current = exprNewToken(EXPR_TOKEN_EOF);
 | |
|         } else if (isdigit(*es->p) ||
 | |
|                   (minus_is_number && *es->p == '-' && isdigit(es->p[1])))
 | |
|         {
 | |
|             current = exprParseNumber(es);
 | |
|         } else if (*es->p == '"' || *es->p == '\'') {
 | |
|             current = exprParseString(es);
 | |
|         } else if (*es->p == '.' && is_selector_char(es->p[1])) {
 | |
|             current = exprParseSelector(es);
 | |
|         } else if (isalpha(*es->p) || strchr(EXPR_OP_SPECIALCHARS, *es->p)) {
 | |
|             current = exprParseOperator(es);
 | |
|         } else if (*es->p == '[') {
 | |
|             current = exprParseTuple(es);
 | |
|         }
 | |
| 
 | |
|         if (current == NULL) {
 | |
|             if (errpos) *errpos = es->p - es->expr;
 | |
|             return 1; // Syntax Error.
 | |
|         }
 | |
| 
 | |
|         /* Push the current token to tokens stack. */
 | |
|         exprStackPush(&es->tokens, current);
 | |
|         if (current->token_type == EXPR_TOKEN_EOF) break;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* Helper function to get operator precedence from the operator table. */
 | |
| int exprGetOpPrecedence(int opcode) {
 | |
|     for (int i = 0; ExprOptable[i].opname != NULL; i++) {
 | |
|         if (ExprOptable[i].opcode == opcode)
 | |
|             return ExprOptable[i].precedence;
 | |
|     }
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| /* Helper function to get operator arity from the operator table. */
 | |
| int exprGetOpArity(int opcode) {
 | |
|     for (int i = 0; ExprOptable[i].opname != NULL; i++) {
 | |
|         if (ExprOptable[i].opcode == opcode)
 | |
|             return ExprOptable[i].arity;
 | |
|     }
 | |
|     return -1;
 | |
| }
 | |
| 
 | |
| /* Process an operator during compilation. Returns 0 on success, 1 on error.
 | |
|  * This function will retain a reference of the operator 'op' in case it
 | |
|  * is pushed on the operators stack. */
 | |
| int exprProcessOperator(exprstate *es, exprtoken *op, int *stack_items, int *errpos) {
 | |
|     if (op->opcode == EXPR_OP_OPAREN) {
 | |
| 	// This is just a marker for us. Do nothing.
 | |
|         exprStackPush(&es->ops_stack, op);
 | |
|         exprTokenRetain(op);
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     if (op->opcode == EXPR_OP_CPAREN) {
 | |
|         /* Process operators until we find the matching opening parenthesis. */
 | |
|         while (1) {
 | |
|             exprtoken *top_op = exprStackPop(&es->ops_stack);
 | |
|             if (top_op == NULL) {
 | |
|                 if (errpos) *errpos = op->offset;
 | |
|                 return 1;
 | |
|             }
 | |
| 
 | |
|             if (top_op->opcode == EXPR_OP_OPAREN) {
 | |
|                 /* Open parethesis found. Our work finished. */
 | |
|                 exprTokenRelease(top_op);
 | |
|                 return 0;
 | |
|             }
 | |
| 
 | |
|             int arity = exprGetOpArity(top_op->opcode);
 | |
|             if (*stack_items < arity) {
 | |
|                 exprTokenRelease(top_op);
 | |
|                 if (errpos) *errpos = top_op->offset;
 | |
|                 return 1;
 | |
|             }
 | |
| 
 | |
|             /* Move the operator on the program stack. */
 | |
|             exprStackPush(&es->program, top_op);
 | |
|             *stack_items = *stack_items - arity + 1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     int curr_prec = exprGetOpPrecedence(op->opcode);
 | |
| 
 | |
|     /* Process operators with higher or equal precedence. */
 | |
|     while (1) {
 | |
|         exprtoken *top_op = exprStackPeek(&es->ops_stack);
 | |
|         if (top_op == NULL || top_op->opcode == EXPR_OP_OPAREN) break;
 | |
| 
 | |
|         int top_prec = exprGetOpPrecedence(top_op->opcode);
 | |
|         if (top_prec < curr_prec) break;
 | |
|         /* Special case for **: only pop if precedence is strictly higher
 | |
|          * so that the operator is right associative, that is:
 | |
|          * 2 ** 3 ** 2 is evaluated as 2 ** (3 ** 2) == 512 instead
 | |
|          * of (2 ** 3) ** 2 == 64. */
 | |
|         if (op->opcode == EXPR_OP_POW && top_prec <= curr_prec) break;
 | |
| 
 | |
|         /* Pop and add to program. */
 | |
|         top_op = exprStackPop(&es->ops_stack);
 | |
|         int arity = exprGetOpArity(top_op->opcode);
 | |
|         if (*stack_items < arity) {
 | |
|             exprTokenRelease(top_op);
 | |
|             if (errpos) *errpos = top_op->offset;
 | |
|             return 1;
 | |
|         }
 | |
| 
 | |
|         /* Move to the program stack. */
 | |
|         exprStackPush(&es->program, top_op);
 | |
|         *stack_items = *stack_items - arity + 1;
 | |
|     }
 | |
| 
 | |
|     /* Push current operator. */
 | |
|     exprStackPush(&es->ops_stack, op);
 | |
|     exprTokenRetain(op);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| /* Compile the expression into a set of push-value and exec-operator
 | |
|  * that exprRun() can execute. The function returns an expstate object
 | |
|  * that can be used for execution of the program. On error, NULL
 | |
|  * is returned, and optionally the position of the error into the
 | |
|  * expression is returned by reference. */
 | |
| exprstate *exprCompile(char *expr, int *errpos) {
 | |
|     /* Initialize expression state. */
 | |
|     exprstate *es = RedisModule_Alloc(sizeof(exprstate));
 | |
|     es->expr = RedisModule_Strdup(expr);
 | |
|     es->p = es->expr;
 | |
| 
 | |
|     /* Initialize all stacks. */
 | |
|     exprStackInit(&es->values_stack);
 | |
|     exprStackInit(&es->ops_stack);
 | |
|     exprStackInit(&es->tokens);
 | |
|     exprStackInit(&es->program);
 | |
| 
 | |
|     /* Tokenization. */
 | |
|     if (exprTokenize(es, errpos)) {
 | |
|         exprFree(es);
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     /* Compile the expression into a sequence of operations. */
 | |
|     int stack_items = 0;  // Track # of items that would be on the stack
 | |
|                          // during execution. This way we can detect arity
 | |
|                          // issues at compile time.
 | |
| 
 | |
|     /* Process each token. */
 | |
|     for (int i = 0; i < es->tokens.numitems; i++) {
 | |
|         exprtoken *token = es->tokens.items[i];
 | |
| 
 | |
|         if (token->token_type == EXPR_TOKEN_EOF) break;
 | |
| 
 | |
|         /* Handle values (numbers, strings, selectors). */
 | |
|         if (token->token_type == EXPR_TOKEN_NUM ||
 | |
|             token->token_type == EXPR_TOKEN_STR ||
 | |
|             token->token_type == EXPR_TOKEN_TUPLE ||
 | |
|             token->token_type == EXPR_TOKEN_SELECTOR)
 | |
|         {
 | |
|             exprStackPush(&es->program, token);
 | |
|             exprTokenRetain(token);
 | |
|             stack_items++;
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         /* Handle operators. */
 | |
|         if (token->token_type == EXPR_TOKEN_OP) {
 | |
|             if (exprProcessOperator(es, token, &stack_items, errpos)) {
 | |
|                 exprFree(es);
 | |
|                 return NULL;
 | |
|             }
 | |
|             continue;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Process remaining operators on the stack. */
 | |
|     while (es->ops_stack.numitems > 0) {
 | |
|         exprtoken *op = exprStackPop(&es->ops_stack);
 | |
|         if (op->opcode == EXPR_OP_OPAREN) {
 | |
|             if (errpos) *errpos = op->offset;
 | |
|             exprTokenRelease(op);
 | |
|             exprFree(es);
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         int arity = exprGetOpArity(op->opcode);
 | |
|         if (stack_items < arity) {
 | |
|             if (errpos) *errpos = op->offset;
 | |
|             exprTokenRelease(op);
 | |
|             exprFree(es);
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         exprStackPush(&es->program, op);
 | |
|         stack_items = stack_items - arity + 1;
 | |
|     }
 | |
| 
 | |
|     /* Verify that exactly one value would remain on the stack after
 | |
|      * execution. We could also check that such value is a number, but this
 | |
|      * would make the code more complex without much gains. */
 | |
|     if (stack_items != 1) {
 | |
|         if (errpos) {
 | |
|             /* Point to the last token's offset for error reporting. */
 | |
|             exprtoken *last = es->tokens.items[es->tokens.numitems - 1];
 | |
|             *errpos = last->offset;
 | |
|         }
 | |
|         exprFree(es);
 | |
|         return NULL;
 | |
|     }
 | |
|     return es;
 | |
| }
 | |
| 
 | |
| /* ============================ Expression execution ======================== */
 | |
| 
 | |
| /* Convert a token to its numeric value. For strings we attempt to parse them
 | |
|  * as numbers, returning 0 if conversion fails. */
 | |
| double exprTokenToNum(exprtoken *t) {
 | |
|     char buf[128];
 | |
|     if (t->token_type == EXPR_TOKEN_NUM) {
 | |
|         return t->num;
 | |
|     } else if (t->token_type == EXPR_TOKEN_STR && t->str.len < sizeof(buf)) {
 | |
|         memcpy(buf, t->str.start, t->str.len);
 | |
|         buf[t->str.len] = '\0';
 | |
|         char *endptr;
 | |
|         double val = strtod(buf, &endptr);
 | |
|         return *endptr == '\0' ? val : 0;
 | |
|     } else {
 | |
|         return 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Conver obejct to true/false (0 or 1) */
 | |
| double exprTokenToBool(exprtoken *t) {
 | |
|     if (t->token_type == EXPR_TOKEN_NUM) {
 | |
|         return t->num != 0;
 | |
|     } else if (t->token_type == EXPR_TOKEN_STR && t->str.len == 0) {
 | |
|         return 0; // Empty string are false, like in Javascript.
 | |
|     } else {
 | |
|         return 1; // Every non numerical type is true.
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Compare two tokens. Returns true if they are equal. */
 | |
| int exprTokensEqual(exprtoken *a, exprtoken *b) {
 | |
|     // If both are strings, do string comparison.
 | |
|     if (a->token_type == EXPR_TOKEN_STR && b->token_type == EXPR_TOKEN_STR) {
 | |
|         return a->str.len == b->str.len &&
 | |
|                memcmp(a->str.start, b->str.start, a->str.len) == 0;
 | |
|     }
 | |
| 
 | |
|     // If both are numbers, do numeric comparison.
 | |
|     if (a->token_type == EXPR_TOKEN_NUM && b->token_type == EXPR_TOKEN_NUM) {
 | |
|         return a->num == b->num;
 | |
|     }
 | |
| 
 | |
|     // Mixed types - convert to numbers and compare.
 | |
|     return exprTokenToNum(a) == exprTokenToNum(b);
 | |
| }
 | |
| 
 | |
| /* Convert a json object to an expression token. There is only
 | |
|  * limited support for JSON arrays: they must be composed of
 | |
|  * just numbers and strings. Returns NULL if the JSON object
 | |
|  * cannot be converted. */
 | |
| exprtoken *exprJsonToToken(cJSON *js) {
 | |
|     if (cJSON_IsNumber(js)) {
 | |
|         exprtoken *obj = exprNewToken(EXPR_TOKEN_NUM);
 | |
|         obj->num = cJSON_GetNumberValue(js);
 | |
|         return obj;
 | |
|     } else if (cJSON_IsString(js)) {
 | |
|         exprtoken *obj = exprNewToken(EXPR_TOKEN_STR);
 | |
|         char *strval = cJSON_GetStringValue(js);
 | |
|         obj->str.heapstr = RedisModule_Strdup(strval);
 | |
|         obj->str.start = obj->str.heapstr;
 | |
|         obj->str.len = strlen(obj->str.heapstr);
 | |
|         return obj;
 | |
|     } else if (cJSON_IsBool(js)) {
 | |
|         exprtoken *obj = exprNewToken(EXPR_TOKEN_NUM);
 | |
|         obj->num = cJSON_IsTrue(js);
 | |
|         return obj;
 | |
|     } else if (cJSON_IsArray(js)) {
 | |
|         // First, scan the array to ensure it only
 | |
|         // contains strings and numbers. Otherwise the
 | |
|         // expression will evaluate to false.
 | |
|         int array_size = cJSON_GetArraySize(js);
 | |
| 
 | |
|         for (int j = 0; j < array_size; j++) {
 | |
|             cJSON *item = cJSON_GetArrayItem(js, j);
 | |
|             if (!cJSON_IsNumber(item) && !cJSON_IsString(item)) return NULL;
 | |
|         }
 | |
| 
 | |
|         // Create a tuple token for the array.
 | |
|         exprtoken *obj = exprNewToken(EXPR_TOKEN_TUPLE);
 | |
|         obj->tuple.len = array_size;
 | |
|         obj->tuple.ele = NULL;
 | |
|         if (obj->tuple.len == 0) return obj; // No elements, already ok.
 | |
| 
 | |
|         obj->tuple.ele =
 | |
|             RedisModule_Alloc(sizeof(exprtoken*) * obj->tuple.len);
 | |
| 
 | |
|         // Convert each array element to a token.
 | |
|         for (size_t j = 0; j < obj->tuple.len; j++) {
 | |
|             cJSON *item = cJSON_GetArrayItem(js, j);
 | |
|             if (cJSON_IsNumber(item)) {
 | |
|                 exprtoken *eleToken = exprNewToken(EXPR_TOKEN_NUM);
 | |
|                 eleToken->num = cJSON_GetNumberValue(item);
 | |
|                 obj->tuple.ele[j] = eleToken;
 | |
|             } else if (cJSON_IsString(item)) {
 | |
|                 exprtoken *eleToken = exprNewToken(EXPR_TOKEN_STR);
 | |
|                 char *strval = cJSON_GetStringValue(item);
 | |
|                 eleToken->str.heapstr = RedisModule_Strdup(strval);
 | |
|                 eleToken->str.start = eleToken->str.heapstr;
 | |
|                 eleToken->str.len = strlen(eleToken->str.heapstr);
 | |
|                 obj->tuple.ele[j] = eleToken;
 | |
|             }
 | |
|         }
 | |
|         return obj;
 | |
|     }
 | |
|     return NULL; // No conversion possible for this type.
 | |
| }
 | |
| 
 | |
| /* Execute the compiled expression program. Returns 1 if the final stack value
 | |
|  * evaluates to true, 0 otherwise. Also returns 0 if any selector callback
 | |
|  * fails. */
 | |
| int exprRun(exprstate *es, char *json, size_t json_len) {
 | |
|     exprStackReset(&es->values_stack);
 | |
|     cJSON *parsed_json = NULL;
 | |
| 
 | |
|     // Execute each instruction in the program.
 | |
|     for (int i = 0; i < es->program.numitems; i++) {
 | |
|         exprtoken *t = es->program.items[i];
 | |
| 
 | |
|         // Handle selectors by calling the callback.
 | |
|         if (t->token_type == EXPR_TOKEN_SELECTOR) {
 | |
|             if (json != NULL) {
 | |
|                 cJSON *attrib = NULL;
 | |
|                 if (parsed_json == NULL) {
 | |
|                     parsed_json = cJSON_ParseWithLength(json,json_len);
 | |
|                     // Will be left to NULL if the above fails.
 | |
|                 }
 | |
|                 if (parsed_json) {
 | |
|                     char item_name[128];
 | |
|                     if (t->str.len > 0 && t->str.len < sizeof(item_name)) {
 | |
|                         memcpy(item_name,t->str.start,t->str.len);
 | |
|                         item_name[t->str.len] = 0;
 | |
|                         attrib = cJSON_GetObjectItem(parsed_json,item_name);
 | |
|                     }
 | |
|                     /* Fill the token according to the JSON type stored
 | |
|                      * at the attribute. */
 | |
|                     if (attrib) {
 | |
|                         exprtoken *obj = exprJsonToToken(attrib);
 | |
|                         if (obj) {
 | |
|                             exprStackPush(&es->values_stack, obj);
 | |
|                             continue;
 | |
|                         }
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             // Selector not found or JSON object not convertible to
 | |
|             // expression tokens. Evaluate the expression to false.
 | |
|             if (parsed_json) cJSON_Delete(parsed_json);
 | |
|             return 0;
 | |
|         }
 | |
| 
 | |
|         // Push non-operator values directly onto the stack.
 | |
|         if (t->token_type != EXPR_TOKEN_OP) {
 | |
|             exprStackPush(&es->values_stack, t);
 | |
|             exprTokenRetain(t);
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         // Handle operators.
 | |
|         exprtoken *result = exprNewToken(EXPR_TOKEN_NUM);
 | |
| 
 | |
|         // Pop operands - we know we have enough from compile-time checks.
 | |
|         exprtoken *b = exprStackPop(&es->values_stack);
 | |
|         exprtoken *a = NULL;
 | |
|         if (exprGetOpArity(t->opcode) == 2) {
 | |
|             a = exprStackPop(&es->values_stack);
 | |
|         }
 | |
| 
 | |
|         switch(t->opcode) {
 | |
|         case EXPR_OP_NOT:
 | |
|             result->num = exprTokenToBool(b) == 0 ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_POW: {
 | |
|             double base = exprTokenToNum(a);
 | |
|             double exp = exprTokenToNum(b);
 | |
|             result->num = pow(base, exp);
 | |
|             break;
 | |
|         }
 | |
|         case EXPR_OP_MULT:
 | |
|             result->num = exprTokenToNum(a) * exprTokenToNum(b);
 | |
|             break;
 | |
|         case EXPR_OP_DIV:
 | |
|             result->num = exprTokenToNum(a) / exprTokenToNum(b);
 | |
|             break;
 | |
|         case EXPR_OP_MOD: {
 | |
|             double va = exprTokenToNum(a);
 | |
|             double vb = exprTokenToNum(b);
 | |
|             result->num = fmod(va, vb);
 | |
|             break;
 | |
|         }
 | |
|         case EXPR_OP_SUM:
 | |
|             result->num = exprTokenToNum(a) + exprTokenToNum(b);
 | |
|             break;
 | |
|         case EXPR_OP_DIFF:
 | |
|             result->num = exprTokenToNum(a) - exprTokenToNum(b);
 | |
|             break;
 | |
|         case EXPR_OP_GT:
 | |
|             result->num = exprTokenToNum(a) > exprTokenToNum(b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_GTE:
 | |
|             result->num = exprTokenToNum(a) >= exprTokenToNum(b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_LT:
 | |
|             result->num = exprTokenToNum(a) < exprTokenToNum(b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_LTE:
 | |
|             result->num = exprTokenToNum(a) <= exprTokenToNum(b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_EQ:
 | |
|             result->num = exprTokensEqual(a, b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_NEQ:
 | |
|             result->num = !exprTokensEqual(a, b) ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_IN: {
 | |
|             // For 'in' operator, b must be a tuple.
 | |
|             result->num = 0;  // Default to false.
 | |
|             if (b->token_type == EXPR_TOKEN_TUPLE) {
 | |
|                 for (size_t j = 0; j < b->tuple.len; j++) {
 | |
|                     if (exprTokensEqual(a, b->tuple.ele[j])) {
 | |
|                         result->num = 1;  // Found a match.
 | |
|                         break;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
|         case EXPR_OP_AND:
 | |
|             result->num =
 | |
|                 exprTokenToBool(a) != 0 && exprTokenToBool(b) != 0 ? 1 : 0;
 | |
|             break;
 | |
|         case EXPR_OP_OR:
 | |
|             result->num =
 | |
|                 exprTokenToBool(a) != 0 || exprTokenToBool(b) != 0 ? 1 : 0;
 | |
|             break;
 | |
|         default:
 | |
|             // Do nothing: we don't want runtime errors.
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         // Free operands and push result.
 | |
|         if (a) exprTokenRelease(a);
 | |
|         exprTokenRelease(b);
 | |
|         exprStackPush(&es->values_stack, result);
 | |
|     }
 | |
| 
 | |
|     if (parsed_json) cJSON_Delete(parsed_json);
 | |
| 
 | |
|     // Get final result from stack.
 | |
|     exprtoken *final = exprStackPop(&es->values_stack);
 | |
|     if (final == NULL) return 0;
 | |
| 
 | |
|     // Convert result to boolean.
 | |
|     int retval = exprTokenToBool(final);
 | |
|     exprTokenRelease(final);
 | |
|     return retval;
 | |
| }
 | |
| 
 | |
| /* ============================ Simple test main ============================ */
 | |
| 
 | |
| #ifdef TEST_MAIN
 | |
| void exprPrintToken(exprtoken *t) {
 | |
|     switch(t->token_type) {
 | |
|         case EXPR_TOKEN_EOF:
 | |
|             printf("EOF");
 | |
|             break;
 | |
|         case EXPR_TOKEN_NUM:
 | |
|             printf("NUM:%g", t->num);
 | |
|             break;
 | |
|         case EXPR_TOKEN_STR:
 | |
|             printf("STR:\"%.*s\"", (int)t->str.len, t->str.start);
 | |
|             break;
 | |
|         case EXPR_TOKEN_SELECTOR:
 | |
|             printf("SEL:%.*s", (int)t->str.len, t->str.start);
 | |
|             break;
 | |
|         case EXPR_TOKEN_OP:
 | |
|             printf("OP:");
 | |
|             for (int i = 0; ExprOptable[i].opname != NULL; i++) {
 | |
|                 if (ExprOptable[i].opcode == t->opcode) {
 | |
|                     printf("%s", ExprOptable[i].opname);
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             break;
 | |
|         default:
 | |
|             printf("UNKNOWN");
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void exprPrintStack(exprstack *stack, const char *name) {
 | |
|     printf("%s (%d items):", name, stack->numitems);
 | |
|     for (int j = 0; j < stack->numitems; j++) {
 | |
|         printf(" ");
 | |
|         exprPrintToken(stack->items[j]);
 | |
|     }
 | |
|     printf("\n");
 | |
| }
 | |
| 
 | |
| int main(int argc, char **argv) {
 | |
|     char *testexpr = "(5+2)*3 and .year > 1980 and 'foo' == 'foo'";
 | |
|     char *testjson = "{\"year\": 1984, \"name\": \"The Matrix\"}";
 | |
|     if (argc >= 2) testexpr = argv[1];
 | |
|     if (argc >= 3) testjson = argv[2];
 | |
| 
 | |
|     printf("Compiling expression: %s\n", testexpr);
 | |
| 
 | |
|     int errpos = 0;
 | |
|     exprstate *es = exprCompile(testexpr,&errpos);
 | |
|     if (es == NULL) {
 | |
|         printf("Compilation failed near \"...%s\"\n", testexpr+errpos);
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     exprPrintStack(&es->tokens, "Tokens");
 | |
|     exprPrintStack(&es->program, "Program");
 | |
|     printf("Running against object: %s\n", testjson);
 | |
|     int result = exprRun(es,testjson,strlen(testjson));
 | |
|     printf("Result1: %s\n", result ? "True" : "False");
 | |
|     result = exprRun(es,testjson,strlen(testjson));
 | |
|     printf("Result2: %s\n", result ? "True" : "False");
 | |
| 
 | |
|     exprFree(es);
 | |
|     return 0;
 | |
| }
 | |
| #endif
 |