[ovs-dev] [PATCH ovn 3/3] lex: New lexical analyzer module for use in OVN.
Ben Pfaff
blp at nicira.com
Thu Feb 26 05:13:47 UTC 2015
I'm determined not to let the terrible style of pseudo-parsing we have in
OVS leak into OVN. Here's the first step.
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
ovn/TODO | 5 -
ovn/automake.mk | 3 +
ovn/lex.c | 688 +++++++++++++++++++++++++++++++++++++++++++++++++++++
ovn/lex.h | 106 +++++++++
ovn/ovn.xml | 41 ++--
tests/automake.mk | 6 +-
tests/ovn.at | 97 ++++++++
tests/test-ovn.c | 112 +++++++++
tests/testsuite.at | 1 +
9 files changed, 1033 insertions(+), 26 deletions(-)
create mode 100644 ovn/lex.c
create mode 100644 ovn/lex.h
create mode 100644 tests/ovn.at
create mode 100644 tests/test-ovn.c
diff --git a/ovn/TODO b/ovn/TODO
index e405c7c..075cb26 100644
--- a/ovn/TODO
+++ b/ovn/TODO
@@ -19,11 +19,6 @@
Probably should be defined so that the data structure is also
useful for references to fields in action parsing.
-** Lexical analysis.
-
- Probably should be defined so that the lexer can be reused for
- parsing actions.
-
** Parsing into syntax tree.
** Semantic checking against variable definitions.
diff --git a/ovn/automake.mk b/ovn/automake.mk
index a4951dc..88847ac 100644
--- a/ovn/automake.mk
+++ b/ovn/automake.mk
@@ -74,4 +74,7 @@ SUFFIXES += .xml
$(AM_V_GEN)$(run_python) $(srcdir)/build-aux/xml2nroff \
--version=$(VERSION) $< > $@.tmp && mv $@.tmp $@
+lib_LTLIBRARIES += lib/libovn.la
+lib_libovn_la_SOURCES = ovn/lex.c ovn/lex.h
+
EXTRA_DIST += ovn/TODO
diff --git a/ovn/lex.c b/ovn/lex.c
new file mode 100644
index 0000000..22e942a
--- /dev/null
+++ b/ovn/lex.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lex.h"
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include "dynamic-string.h"
+#include "json.h"
+#include "util.h"
+
+/* Initializes 'token'. */
+void
+lex_token_init(struct lex_token *token)
+{
+ token->type = LEX_T_END;
+ token->s = NULL;
+}
+
+/* Frees memory owned by 'token'. */
+void
+lex_token_destroy(struct lex_token *token)
+{
+ free(token->s);
+}
+
+/* Exchanges 'a' and 'b'. */
+void
+lex_token_swap(struct lex_token *a, struct lex_token *b)
+{
+ struct lex_token tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+/* lex_token_format(). */
+
+static size_t
+lex_token_n_zeros(enum lex_format format)
+{
+ switch (format) {
+ case LEX_F_DECIMAL: return offsetof(union mf_subvalue, integer);
+ case LEX_F_HEXADECIMAL: return 0;
+ case LEX_F_IPV4: return offsetof(union mf_subvalue, ipv4);
+ case LEX_F_IPV6: return offsetof(union mf_subvalue, ipv6);
+ case LEX_F_ETHERNET: return offsetof(union mf_subvalue, mac);
+ default: OVS_NOT_REACHED();
+ }
+}
+
+/* Returns the effective format for 'token', that is, the format in which it
+ * should actually be printed. This is ordinarily the same as 'token->format',
+ * but it's always possible that someone sets up a token with a format that
+ * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
+ * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
+ * to avoid confusion in the future.) */
+static enum lex_format
+lex_token_get_format(const struct lex_token *token)
+{
+ size_t n_zeros = lex_token_n_zeros(token->format);
+ return (is_all_zeros(&token->value, n_zeros)
+ && (token->type != LEX_T_MASKED_INTEGER
+ || is_all_zeros(&token->mask, n_zeros))
+ ? token->format
+ : LEX_F_HEXADECIMAL);
+}
+
+static void
+lex_token_format_value(const union mf_subvalue *value,
+ enum lex_format format, struct ds *s)
+{
+ switch (format) {
+ case LEX_F_DECIMAL:
+ ds_put_format(s, "%"PRIu64, ntohll(value->integer));
+ break;
+
+ case LEX_F_HEXADECIMAL:
+ for (const uint8_t *p = value->u8, *end = p + ARRAY_SIZE(value->u8);
+ p < end; p++) {
+ if (*p) {
+ ds_put_format(s, "0x%"PRIx8, *p);
+ for (p++; p < end; p++) {
+ ds_put_format(s, "%02"PRIx8, *p);
+ }
+ return;
+ }
+ }
+ ds_put_cstr(s, "0");
+ break;
+
+ case LEX_F_IPV4:
+ ds_put_format(s, IP_FMT, IP_ARGS(value->ipv4));
+ break;
+
+ case LEX_F_IPV6:
+ print_ipv6_addr(s, &value->ipv6);
+ break;
+
+ case LEX_F_ETHERNET:
+ ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(value->mac));
+ break;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+
+}
+
+static void
+lex_token_format_masked_integer(const struct lex_token *token, struct ds *s)
+{
+ enum lex_format format = lex_token_get_format(token);
+
+ lex_token_format_value(&token->value, format, s);
+ ds_put_char(s, '/');
+
+ const union mf_subvalue *mask = &token->mask;
+ if (format == LEX_F_IPV4 && ip_is_cidr(mask->ipv4)) {
+ ds_put_format(s, "%d", ip_count_cidr_bits(mask->ipv4));
+ } else if (token->format == LEX_F_IPV6 && ipv6_is_cidr(&mask->ipv6)) {
+ ds_put_format(s, "%d", ipv6_count_cidr_bits(&mask->ipv6));
+ } else {
+ lex_token_format_value(&token->mask, format, s);
+ }
+}
+
+
+static void
+lex_token_format_string(const char *s, struct ds *ds)
+{
+ struct json json;
+ json.type = JSON_STRING;
+ json.u.string = CONST_CAST(char *, s);
+ json_to_ds(&json, 0, ds);
+}
+
+/* Appends a string representation of 'token' to 's', in a format that can be
+ * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
+ * parsed back.) */
+void
+lex_token_format(struct lex_token *token, struct ds *s)
+{
+ switch (token->type) {
+ case LEX_T_END:
+ ds_put_cstr(s, "$");
+ break;
+
+ case LEX_T_ID:
+ ds_put_cstr(s, token->s);
+ break;
+
+ case LEX_T_ERROR:
+ ds_put_cstr(s, "error(");
+ lex_token_format_string(token->s, s);
+ ds_put_char(s, ')');
+ break;
+
+ case LEX_T_STRING:
+ lex_token_format_string(token->s, s);
+ break;
+
+ break;
+
+ case LEX_T_INTEGER:
+ lex_token_format_value(&token->value, lex_token_get_format(token), s);
+ break;
+
+ case LEX_T_MASKED_INTEGER:
+ lex_token_format_masked_integer(token, s);
+ break;
+
+ case LEX_T_LPAREN:
+ ds_put_cstr(s, "(");
+ break;
+ case LEX_T_RPAREN:
+ ds_put_cstr(s, ")");
+ break;
+ case LEX_T_LCURLY:
+ ds_put_cstr(s, "{");
+ break;
+ case LEX_T_RCURLY:
+ ds_put_cstr(s, "}");
+ break;
+ case LEX_T_LSQUARE:
+ ds_put_cstr(s, "[");
+ break;
+ case LEX_T_RSQUARE:
+ ds_put_cstr(s, "]");
+ break;
+ case LEX_T_EQ:
+ ds_put_cstr(s, "==");
+ break;
+ case LEX_T_NE:
+ ds_put_cstr(s, "!=");
+ break;
+ case LEX_T_LT:
+ ds_put_cstr(s, "<");
+ break;
+ case LEX_T_LE:
+ ds_put_cstr(s, "<=");
+ break;
+ case LEX_T_GT:
+ ds_put_cstr(s, ">");
+ break;
+ case LEX_T_GE:
+ ds_put_cstr(s, ">=");
+ break;
+ case LEX_T_LOG_NOT:
+ ds_put_cstr(s, "!");
+ break;
+ case LEX_T_LOG_AND:
+ ds_put_cstr(s, "&&");
+ break;
+ case LEX_T_LOG_OR:
+ ds_put_cstr(s, "||");
+ break;
+ case LEX_T_ELLIPSIS:
+ ds_put_cstr(s, "..");
+ break;
+ case LEX_T_COMMA:
+ ds_put_cstr(s, ",");
+ break;
+ case LEX_T_SEMICOLON:
+ ds_put_cstr(s, ";");
+ break;
+ case LEX_T_EQUALS:
+ ds_put_cstr(s, "=");
+ break;
+ default:
+ OVS_NOT_REACHED();
+ }
+
+}
+
+/* lex_token_parse(). */
+
+static void OVS_PRINTF_FORMAT(2, 3)
+lex_error(struct lex_token *token, const char *message, ...)
+{
+ token->type = LEX_T_ERROR;
+
+ va_list args;
+ va_start(args, message);
+ token->s = xvasprintf(message, args);
+ va_end(args);
+}
+
+static void
+lex_parse_hex_integer(const char *start, size_t len, struct lex_token *token)
+{
+ const char *in = start + (len - 1);
+ uint8_t *out = token->value.u8 + (sizeof token->value.u8 - 1);
+
+ for (int i = 0; i < len; i++) {
+ int hexit = hexit_value(in[-i]);
+ if (hexit < 0) {
+ lex_error(token, "Invalid syntax in hexadecimal constant.");
+ return;
+ }
+ if (hexit && i / 2 >= sizeof token->value.u8) {
+ lex_error(token, "Hexadecimal constant requires more than "
+ "%"PRIuSIZE" bits.", 8 * sizeof token->value.u8);
+ return;
+ }
+ out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit;
+ }
+ token->format = LEX_F_HEXADECIMAL;
+}
+
+static const char *
+lex_parse_integer__(const char *p, struct lex_token *token)
+{
+ const char *start = p;
+ const char *end = p + strspn(p, "0123456789abcdefABCDEFxX.:");
+ size_t len = end - start;
+
+ int n;
+ uint8_t mac[ETH_ADDR_LEN];
+
+ token->type = LEX_T_INTEGER;
+ if (!len) {
+ lex_error(token, "Integer constant expected.");
+ } else if (len == 17
+ && ovs_scan(start, ETH_ADDR_SCAN_FMT"%n",
+ ETH_ADDR_SCAN_ARGS(mac), &n)
+ && n == len) {
+ memcpy(token->value.mac, mac, sizeof token->value.mac);
+ token->format = LEX_F_ETHERNET;
+ } else if (start + strspn(start, "0123456789") == end) {
+ if (p[0] == '0' && len > 1) {
+ lex_error(token, "Decimal constants must not have leading zeros.");
+ } else {
+ unsigned long long int integer;
+ char *tail;
+
+ errno = 0;
+ integer = strtoull(p, &tail, 10);
+ if (tail != end || errno == ERANGE) {
+ lex_error(token, "Decimal constants must be less than 2**64.");
+ } else {
+ token->value.integer = htonll(integer);
+ token->format = LEX_F_DECIMAL;
+ }
+ }
+ } else if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ if (len > 2) {
+ lex_parse_hex_integer(start + 2, len - 2, token);
+ } else {
+ lex_error(token, "Hex digits expected following 0%c.", p[1]);
+ }
+ } else if (len < INET6_ADDRSTRLEN) {
+ char copy[INET6_ADDRSTRLEN];
+ memcpy(copy, p, len);
+ copy[len] = '\0';
+
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ if (inet_pton(AF_INET, copy, &ipv4) == 1) {
+ token->value.ipv4 = ipv4.s_addr;
+ token->format = LEX_F_IPV4;
+ } else if (inet_pton(AF_INET6, copy, &ipv6) == 1) {
+ token->value.ipv6 = ipv6;
+ token->format = LEX_F_IPV6;
+ } else {
+ lex_error(token, "Invalid numeric constant.");
+ }
+ } else {
+ lex_error(token, "Invalid numeric constant.");
+ }
+
+ ovs_assert(token->type == LEX_T_INTEGER || token->type == LEX_T_ERROR);
+ return end;
+}
+
+static const char *
+lex_parse_integer(const char *p, struct lex_token *token)
+{
+ memset(&token->value, 0, sizeof token->value);
+ p = lex_parse_integer__(p, token);
+ if (token->type == LEX_T_INTEGER && *p == '/') {
+ struct lex_token mask;
+
+ lex_token_init(&mask);
+ memset(&mask.value, 0, sizeof mask.value);
+ p = lex_parse_integer__(p + 1, &mask);
+ if (mask.type == LEX_T_INTEGER) {
+ token->type = LEX_T_MASKED_INTEGER;
+
+ uint32_t prefix_bits = ntohll(mask.value.integer);
+ if (token->format == mask.format) {
+ /* Same format value and mask is always OK. */
+ token->mask = mask.value;
+ } else if (token->format == LEX_F_IPV4
+ && mask.format == LEX_F_DECIMAL
+ && prefix_bits <= 32) {
+ /* IPv4 address with decimal mask is a CIDR prefix. */
+ token->mask.integer = htonll(ntohl(be32_prefix_mask(
+ prefix_bits)));
+ } else if (token->format == LEX_F_IPV6
+ && mask.format == LEX_F_DECIMAL
+ && prefix_bits <= 128) {
+ /* IPv6 address with decimal mask is a CIDR prefix. */
+ token->mask.ipv6 = ipv6_create_mask(prefix_bits);
+ } else if (token->format == LEX_F_DECIMAL
+ && mask.format == LEX_F_HEXADECIMAL
+ && token->value.integer == 0) {
+ /* Special case for e.g. 0/0x1234. */
+ token->format = LEX_F_HEXADECIMAL;
+ token->mask = mask.value;
+ } else {
+ lex_error(token, "Value and mask have incompatible formats.");
+ return p;
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) {
+ ovs_be32 v = token->value.be32[i];
+ ovs_be32 m = token->mask.be32[i];
+
+ if (v & ~m) {
+ lex_error(token, "Value contains unmasked 1-bits.");
+ break;
+ }
+ }
+
+ return p;
+ } else {
+ lex_token_swap(&mask, token);
+ }
+ lex_token_destroy(&mask);
+ }
+ return p;
+}
+
+static const char *
+lex_parse_string(const char *p, struct lex_token *token)
+{
+ const char *start = ++p;
+ for (;;) {
+ switch (*p) {
+ case '\0':
+ lex_error(token, "Input ends inside quoted string.");
+ return p;
+
+ case '"':
+ token->type = (json_string_unescape(start, p - start, &token->s)
+ ? LEX_T_STRING : LEX_T_ERROR);
+ return p + 1;
+
+ case '\\':
+ p++;
+ if (*p) {
+ p++;
+ }
+ break;
+
+ default:
+ p++;
+ break;
+ }
+ }
+
+}
+
+static bool
+lex_is_id1(unsigned char c)
+{
+ return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || c == '_' || c == '.');
+}
+
+static bool
+lex_is_idn(unsigned char c)
+{
+ return lex_is_id1(c) || (c >= '0' && c <= '9');
+}
+
+static const char *
+lex_parse_id(const char *p, struct lex_token *token)
+{
+ const char *start = p;
+
+ do {
+ p++;
+ } while (lex_is_idn(*p));
+
+ token->type = LEX_T_ID;
+ token->s = xmemdup0(start, p - start);
+ return p;
+}
+
+/* Initializes 'token' and parses the first token from the beginning of
+ * null-terminated string 'p' into 'token'. Returns the character position
+ * at which to begin parsing the next token. */
+const char *
+lex_token_parse(struct lex_token *token, const char *p)
+{
+ lex_token_init(token);
+
+next:
+ switch (*p) {
+ case '\0':
+ token->type = LEX_T_END;
+ return p;
+
+ case ' ': case '\t': case '\n': case '\r':
+ p++;
+ goto next;
+
+ case '/':
+ p++;
+ if (*p == '/') {
+ do {
+ p++;
+ } while (*p != '\0' && *p != '\n');
+ goto next;
+ } else if (*p == '*') {
+ p++;
+ for (;;) {
+ if (*p == '*' && p[1] == '/') {
+ p += 2;
+ goto next;
+ } else if (*p == '\0' || *p == '\n') {
+ lex_error(token, "`/*' without matching `*/'.");
+ return p;
+ } else {
+ p++;
+ }
+ }
+ goto next;
+ } else {
+ lex_error(token,
+ "`/' is only valid as part of `//' or `/*'.");
+ }
+ break;
+
+ case '(':
+ token->type = LEX_T_LPAREN;
+ p++;
+ break;
+
+ case ')':
+ token->type = LEX_T_RPAREN;
+ p++;
+ break;
+
+ case '{':
+ token->type = LEX_T_LCURLY;
+ p++;
+ break;
+
+ case '}':
+ token->type = LEX_T_RCURLY;
+ p++;
+ break;
+
+ case '[':
+ token->type = LEX_T_LSQUARE;
+ p++;
+ break;
+
+ case ']':
+ token->type = LEX_T_RSQUARE;
+ p++;
+ break;
+
+ case '=':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_EQ;
+ p++;
+ } else {
+ token->type = LEX_T_EQUALS;
+ }
+ break;
+
+ case '!':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_NE;
+ p++;
+ } else {
+ token->type = LEX_T_LOG_NOT;
+ }
+ break;
+
+ case '&':
+ p++;
+ if (*p == '&') {
+ token->type = LEX_T_LOG_AND;
+ p++;
+ } else {
+ lex_error(token, "`&' is only valid as part of `&&'.");
+ }
+ break;
+
+ case '|':
+ p++;
+ if (*p == '|') {
+ token->type = LEX_T_LOG_OR;
+ p++;
+ } else {
+ lex_error(token, "`|' is only valid as part of `||'.");
+ }
+ break;
+
+ case '<':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_LE;
+ p++;
+ } else {
+ token->type = LEX_T_LT;
+ }
+ break;
+
+ case '>':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_GE;
+ p++;
+ } else {
+ token->type = LEX_T_GT;
+ }
+ break;
+
+ case '.':
+ p++;
+ if (*p == '.') {
+ token->type = LEX_T_ELLIPSIS;
+ p++;
+ } else {
+ lex_error(token, "`.' is only valid as part of `..' "
+ "or a numeric constant.");
+ }
+ break;
+
+ case ',':
+ p++;
+ token->type = LEX_T_COMMA;
+ break;
+
+ case ';':
+ p++;
+ token->type = LEX_T_SEMICOLON;
+ break;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case ':':
+ p = lex_parse_integer(p, token);
+ break;
+
+ case '"':
+ p = lex_parse_string(p, token);
+ break;
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ /* We need to distinguish an Ethernet address or IPv6 address from an
+ * identifier. Fortunately, Ethernet addresses, and IPv6 addresses
+ * that ambiguous based on the first character, always start with
+ * hex digits followed by a colon, but identifiers never do. */
+ p = (p[strspn(p, "0123456789abcdefABCDEF")] == ':'
+ ? lex_parse_integer(p, token)
+ : lex_parse_id(p, token));
+ break;
+
+ default:
+ if (lex_is_id1(*p)) {
+ p = lex_parse_id(p, token);
+ } else {
+ if (isprint((unsigned char) *p)) {
+ lex_error(token, "Invalid character `%c' in input.", *p);
+ } else {
+ lex_error(token, "Invalid byte 0x%d in input.", *p);
+ }
+ p++;
+ }
+ break;
+ }
+
+ return p;
+}
+
+/* Initializes 'lexer' for parsing 'input'.
+ *
+ * While the lexer is in use, 'input' must remain available, but the caller
+ * otherwise retains ownership of 'input'.
+ *
+ * The caller must call lexer_get() to obtain the first token. */
+void
+lexer_init(struct lexer *lexer, const char *input)
+{
+ lexer->input = input;
+ memset(&lexer->token, 0, sizeof lexer->token);
+}
+
+/* Frees storage associated with 'lexer'. */
+void
+lexer_destroy(struct lexer *lexer)
+{
+ lex_token_destroy(&lexer->token);
+}
+
+/* Obtains the next token from 'lexer' into 'lexer->token', and returns the
+ * token's type. The caller may examine 'lexer->token' directly to obtain full
+ * information about the token. */
+enum lex_type
+lexer_get(struct lexer *lexer)
+{
+ lex_token_destroy(&lexer->token);
+ lexer->input = lex_token_parse(&lexer->token, lexer->input);
+ return lexer->token.type;
+}
diff --git a/ovn/lex.h b/ovn/lex.h
new file mode 100644
index 0000000..b035c65
--- /dev/null
+++ b/ovn/lex.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_LEX_H
+#define OVN_LEX_H 1
+
+/* OVN lexical analyzer
+ * ====================
+ *
+ * This is a simple lexical analyzer (or tokenizer) for OVN match expressions
+ * and ACLs. */
+
+#include "meta-flow.h"
+
+struct ds;
+
+/* Token type. */
+enum lex_type {
+ LEX_T_END, /* end of input */
+
+ /* Tokens with auxiliary data. */
+ LEX_T_ID, /* foo */
+ LEX_T_STRING, /* "foo" */
+ LEX_T_INTEGER, /* 12345 or 1.2.3.4 or ::1 or 01:02:03:04:05 */
+ LEX_T_MASKED_INTEGER, /* 12345/10 or 1.2.0.0/16 or ::2/127 or... */
+ LEX_T_ERROR, /* invalid input */
+
+ /* Bare tokens. */
+ LEX_T_LPAREN, /* ( */
+ LEX_T_RPAREN, /* ) */
+ LEX_T_LCURLY, /* { */
+ LEX_T_RCURLY, /* } */
+ LEX_T_LSQUARE, /* [ */
+ LEX_T_RSQUARE, /* ] */
+ LEX_T_EQ, /* == */
+ LEX_T_NE, /* != */
+ LEX_T_LT, /* < */
+ LEX_T_LE, /* <= */
+ LEX_T_GT, /* > */
+ LEX_T_GE, /* >= */
+ LEX_T_LOG_NOT, /* ! */
+ LEX_T_LOG_AND, /* && */
+ LEX_T_LOG_OR, /* || */
+ LEX_T_ELLIPSIS, /* .. */
+ LEX_T_COMMA, /* , */
+ LEX_T_SEMICOLON, /* ; */
+ LEX_T_EQUALS, /* = */
+};
+
+/* Subtype for LEX_T_INTEGER and LEX_T_MASKED_INTEGER tokens.
+ *
+ * These do not change the semantics of a token; instead, they determine the
+ * format used when a token is serialized back to a text form. That's
+ * important because 3232268289 is meaningless to a human whereas 192.168.128.1
+ * has some actual significance. */
+enum lex_format {
+ LEX_F_DECIMAL,
+ LEX_F_HEXADECIMAL,
+ LEX_F_IPV4,
+ LEX_F_IPV6,
+ LEX_F_ETHERNET,
+};
+
+/* A token.
+ *
+ * 's' is owned by the token. */
+struct lex_token {
+ enum lex_type type; /* One of LEX_*. */
+ char *s; /* LEX_T_ID, LEX_T_STRING, LEX_T_ERROR only. */
+ enum lex_format format; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */
+ union mf_subvalue value; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */
+ union mf_subvalue mask; /* LEX_T_MASKED_INTEGER only. */
+};
+
+void lex_token_init(struct lex_token *);
+void lex_token_destroy(struct lex_token *);
+void lex_token_swap(struct lex_token *, struct lex_token *);
+
+void lex_token_format(struct lex_token *, struct ds *);
+const char *lex_token_parse(struct lex_token *, const char *input);
+
+/* A lexical analyzer. */
+struct lexer {
+ const char *input; /* Remaining input (not owned by lexer). */
+ struct lex_token token; /* Current token (owned by lexer). */
+};
+
+void lexer_init(struct lexer *, const char *input);
+void lexer_destroy(struct lexer *);
+
+enum lex_type lexer_get(struct lexer *);
+
+#endif /* ovn/lex.h */
diff --git a/ovn/ovn.xml b/ovn/ovn.xml
index a233112..e7fbb5f 100644
--- a/ovn/ovn.xml
+++ b/ovn/ovn.xml
@@ -278,9 +278,11 @@
</p>
<p>
- The <code>inport</code> and <code>outport</code> fields have string
- values. The useful values are <ref column="logical_port"/> names from
- the <ref column="Bindings"/> and <ref column="Gateway"/> table.
+ The <code>inport</code> and <code>outport</code> fields have quoted
+ string values. Quoted strings have the same syntax as quoted strings
+ in JSON (thus, they are Unicode strings). The useful values are <ref
+ column="logical_port"/> names from the <ref column="Bindings"/> and
+ <ref column="Gateway"/> table.
</p>
<p>
@@ -289,7 +291,7 @@
<ul>
<li><code>()</code></li>
- <li><code>== != < <= > >= in not in</code></li>
+ <li><code>== != < <= > >=</code></li>
<li><code>!</code></li>
<li><code>&&</code></li>
<li><code>||</code></li>
@@ -315,27 +317,28 @@
</p>
<p>
- The relational operators are <, <=, >, and >=. Their
- operands must be a field and a constant, in either order; the constant
- must not be masked. These operators are most commonly useful for L4
- ports, e.g. <code>tcp.src < 1024</code>. Implementation of the
- relational operators is expensive.
- </p>
-
- <p>
- The set membership operator <code>in</code>, with syntax
- ``<code><var>field</var> in { <var>constant1</var>,
- <var>constant2</var>,</code> ... <code>}</code>'', is syntactic sugar
- for ``<code>(<var>field</var> == <var>constant1</var> ||
+ The <code>==</code> operator also serves as a set membership operator,
+ with syntax ``<code><var>field</var> == { <var>constant1</var>,
+ <var>constant2</var>,</code> ... <code>}</code>'' acting as syntactic
+ sugar for ``<code>(<var>field</var> == <var>constant1</var> ||
<var>field</var> == <var>constant2</var> || </code>...<code>)</code>.
- Conversely, ``<code><var>field</var> not in { <var>constant1</var>,
- <var>constant2</var>, </code>...<code> }</code>'' is syntactic sugar
- for ``<code>(<var>field</var> != <var>constant1</var> &&
+ Similarly, ``<code><var>field</var> != { <var>constant1</var>,
+ <var>constant2</var>, </code>...<code> }</code>'' is equivalent to
+ ``<code>(<var>field</var> != <var>constant1</var> &&
<var>field</var> != <var>constant2</var> &&
</code>...<code>)</code>''.
</p>
<p>
+ The relational operators are <, <=, >, and >=. Their
+ operands must be a field and a constant, in either order. These
+ operators are most commonly useful for L4 ports, e.g. <code>tcp.src
+ < 1024</code>. The constant operand must not be masked, but the
+ field reference may use subfield syntax, e.g. <code>vlan.tci[12..15] >
+ 1</code>. Implementation of the relational operators is expensive.
+ </p>
+
+ <p>
The unary prefix operator <code>!</code> yields its operand's inverse.
</p>
diff --git a/tests/automake.mk b/tests/automake.mk
index 50d8ad2..949454c 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -80,7 +80,8 @@ TESTSUITE_AT = \
tests/rstp.at \
tests/interface-reconfigure.at \
tests/vlog.at \
- tests/vtep-ctl.at
+ tests/vtep-ctl.at \
+ tests/ovn.at
KMOD_TESTSUITE_AT = \
tests/kmod-testsuite.at \
@@ -271,6 +272,7 @@ tests_ovstest_SOURCES = \
tests/test-multipath.c \
tests/test-netflow.c \
tests/test-odp.c \
+ tests/test-ovn.c \
tests/test-packets.c \
tests/test-random.c \
tests/test-reconnect.c \
@@ -288,7 +290,7 @@ tests_ovstest_SOURCES += \
tests/test-unix-socket.c
endif
-tests_ovstest_LDADD = lib/libopenvswitch.la
+tests_ovstest_LDADD = lib/libopenvswitch.la lib/libovn.la
dist_check_SCRIPTS = tests/flowgen.pl
noinst_PROGRAMS += tests/test-strtok_r
diff --git a/tests/ovn.at b/tests/ovn.at
new file mode 100644
index 0000000..69a5d96
--- /dev/null
+++ b/tests/ovn.at
@@ -0,0 +1,97 @@
+AT_BANNER([OVN])
+
+AT_SETUP([ovn -- lexer])
+dnl OVN lexer test cases.
+dnl For lines without =>, input and expected output are identical.
+dnl For lines without =>, input precedes <= and expected output follows =>.
+AT_DATA([test-cases.txt], [dnl
+foo bar baz quuxquuxquux _abcd_ a.b.c.d a123_.456
+"abc\u0020def" => "abc def"
+" => error("Input ends inside quoted string.")dnl "
+
+a/*b*/c => a c
+a//b c => a
+a/**/b => a b
+a/*/b => a error("`/*' without matching `*/'.")
+a/*/**/b => a b
+a/b => a error("`/' is only valid as part of `//' or `/*'.") b
+
+0 1 12345 18446744073709551615
+18446744073709551616 => error("Decimal constants must be less than 2**64.")
+9999999999999999999999 => error("Decimal constants must be less than 2**64.")
+01 => error("Decimal constants must not have leading zeros.")
+
+0/0
+0/1
+1/0 => error("Value contains unmasked 1-bits.")
+1/1
+128/384
+1/3
+1/ => error("Integer constant expected.")
+
+1/0x123 => error("Value and mask have incompatible formats.")
+
+0x1234
+0x01234 => 0x1234
+0x0 => 0
+0x000 => 0
+0xfedcba9876543210
+0XFEDCBA9876543210 => 0xfedcba9876543210
+0xfedcba9876543210fedcba9876543210
+0xfedcba9876543210fedcba98765432100 => error("Hexadecimal constant requires more than 128 bits.")
+0x0000fedcba9876543210fedcba9876543210 => 0xfedcba9876543210fedcba9876543210
+0x => error("Hex digits expected following 0x.")
+0X => error("Hex digits expected following 0X.")
+0x0/0x0 => 0/0
+0x0/0x1 => 0/0x1
+0x1/0x0 => error("Value contains unmasked 1-bits.")
+0xffff/0x1ffff
+0x. => error("Invalid syntax in hexadecimal constant.")
+
+192.168.128.1 1.2.3.4 255.255.255.255 0.0.0.0
+256.1.2.3 => error("Invalid numeric constant.")
+192.168.0.0/16
+192.168.0.0/255.255.0.0 => 192.168.0.0/16
+192.168.0.0/255.255.255.0 => 192.168.0.0/24
+192.168.0.0/255.255.0.255
+192.168.0.0/255.0.0.0 => error("Value contains unmasked 1-bits.")
+192.168.0.0/32
+192.168.0.0/255.255.255.255 => 192.168.0.0/32
+
+::
+::1
+ff00::1234 => ff00::1234
+2001:db8:85a3::8a2e:370:7334
+2001:db8:85a3:0:0:8a2e:370:7334 => 2001:db8:85a3::8a2e:370:7334
+2001:0db8:85a3:0000:0000:8a2e:0370:7334 => 2001:db8:85a3::8a2e:370:7334
+::ffff:192.0.2.128
+::ffff:c000:0280 => ::ffff:192.0.2.128
+::1/::1
+::1/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff => ::1/128
+::1/128
+ff00::/8
+ff00::/ff00:: => ff00::/8
+
+01:23:45:67:ab:cd
+01:23:45:67:AB:CD => 01:23:45:67:ab:cd
+fe:dc:ba:98:76:54
+FE:DC:ba:98:76:54 => fe:dc:ba:98:76:54
+01:00:00:00:00:00/01:00:00:00:00:00
+ff:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff
+fe:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff
+ff:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff => error("Value contains unmasked 1-bits.")
+fe:x => error("Invalid numeric constant.")
+00:01:02:03:04:x => error("Invalid numeric constant.")
+
+(){}[[]]==!=<<=>>=!&&||..,;= => ( ) { } [[ ]] == != < <= > >= ! && || .. , ; =
+& => error("`&' is only valid as part of `&&'.")
+| => error("`|' is only valid as part of `||'.")
+. => error("`.' is only valid as part of `..' or a numeric constant.")
+
+^ => error("Invalid character `^' in input.")
+])
+AT_CAPTURE_FILE([input.txt])
+sed 's/ =>.*//' test-cases.txt > input.txt
+sed 's/.* => //' test-cases.txt > expout
+AT_CHECK([ovstest test-ovn lex < input.txt], [0], [expout])
+AT_CLEANUP
diff --git a/tests/test-ovn.c b/tests/test-ovn.c
new file mode 100644
index 0000000..a4fd23f
--- /dev/null
+++ b/tests/test-ovn.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "command-line.h"
+#include "dynamic-string.h"
+#include "ovn/lex.h"
+#include "ovstest.h"
+#include "util.h"
+
+static void
+compare_token(const struct lex_token *a, const struct lex_token *b)
+{
+ if (a->type != b->type) {
+ fprintf(stderr, "type differs: %d -> %d\n", a->type, b->type);
+ return;
+ }
+
+ if (!((a->s && b->s && !strcmp(a->s, b->s))
+ || (!a->s && !b->s))) {
+ fprintf(stderr, "string differs: %s -> %s\n",
+ a->s ? a->s : "(null)",
+ b->s ? b->s : "(null)");
+ return;
+ }
+
+ if (a->type == LEX_T_INTEGER || a->type == LEX_T_MASKED_INTEGER) {
+ if (memcmp(&a->value, &b->value, sizeof a->value)) {
+ fprintf(stderr, "value differs\n");
+ return;
+ }
+
+ if (a->type == LEX_T_MASKED_INTEGER
+ && memcmp(&a->mask, &b->mask, sizeof a->mask)) {
+ fprintf(stderr, "mask differs\n");
+ return;
+ }
+ }
+
+ if (a->format != b->format
+ && !(a->format == LEX_F_HEXADECIMAL
+ && b->format == LEX_F_DECIMAL
+ && a->value.integer == 0)) {
+ fprintf(stderr, "format differs: %d -> %d\n", a->format, b->format);
+ }
+}
+
+static void
+test_lex(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+ struct ds input;
+ struct ds output;
+
+ ds_init(&input);
+ ds_init(&output);
+ while (!ds_get_line(&input, stdin)) {
+ struct lexer lexer;
+
+ lexer_init(&lexer, ds_cstr(&input));
+ ds_clear(&output);
+ while (lexer_get(&lexer) != LEX_T_END) {
+ size_t len = output.length;
+ lex_token_format(&lexer.token, &output);
+
+ /* Check that the formatted version can really be parsed back
+ * losslessly. */
+ if (lexer.token.type != LEX_T_ERROR) {
+ const char *s = ds_cstr(&output) + len;
+ struct lexer l2;
+
+ lexer_init(&l2, s);
+ lexer_get(&l2);
+ compare_token(&lexer.token, &l2.token);
+ lexer_destroy(&l2);
+ }
+ ds_put_char(&output, ' ');
+ }
+ lexer_destroy(&lexer);
+
+ ds_chomp(&output, ' ');
+ puts(ds_cstr(&output));
+ }
+ ds_destroy(&input);
+ ds_destroy(&output);
+}
+
+static const struct command commands[] = {
+ {"lex", NULL, 0, 0, test_lex},
+ {NULL, NULL, 0, 0, NULL},
+};
+
+static void
+test_ovn_main(int argc, char *argv[])
+{
+ set_program_name(argv[0]);
+ run_command(argc - 1, argv + 1, commands);
+}
+
+OVSTEST_REGISTER("test-ovn", test_ovn_main);
diff --git a/tests/testsuite.at b/tests/testsuite.at
index cd7f455..ba1f5bb 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -65,3 +65,4 @@ m4_include([tests/stp.at])
m4_include([tests/rstp.at])
m4_include([tests/vlog.at])
m4_include([tests/vtep-ctl.at])
+m4_include([tests/ovn.at])
--
2.1.3
More information about the dev
mailing list