[ovs-dev] [RFC flow tunnels 5/8] tunnel: Userspace implementation of tunnel manipulation.

Jarno Rajahalme jarno.rajahalme at nsn.com
Thu Jan 10 15:53:35 UTC 2013


First of all: Thanks for these patches. I just managed to get something that
passes the tests "patched up" based on the older patches last night, most
of which I can now throw away :-)

On Jan 10, 2013, at 1:43 , ext Ethan Jackson wrote:
> From: Jesse Gross <jesse at nicira.com>
> 
> ...
> +
> +struct tnl_match {
> +    ovs_be64 in_key;
> +    ovs_be32 ip_src;
> +    ovs_be32 ip_dst;
> +    uint32_t odp_port;
> +    bool in_key_present;
> +    bool in_key_flow;
> +};

tnl_match is only used to match on incoming packets. Therefore it would be
much clearer if "ip_src" actually meant the "ip source address" and "ip_dst"
"ip destination address" w.r.t the received packet. See below.

> +BUILD_ASSERT_DECL(sizeof(struct tnl_match) == 24);
> +
> +struct tnl_port {
> +    struct hmap_node match_node;
> +    struct hmap_node ofport_node;
> +
> +    const struct ofport *ofport;
> +    unsigned int netdev_seq;
> +    struct tnl_match match;
> +};

How about the struct ofport_dpif pointing to this? That way we could get rid of
the ofport_map (and find_ofport()), as the calling sites in ofproto_dpif.c would
have the ofport_dpif->tnl_port available?

The pointer could also be used for a bit faster local test (non-NULL for tunnels,
NULL for non-tunnels) in ofproto_dpif.

> +
> +static struct hmap tnl_match_map = HMAP_INITIALIZER(&tnl_match_map);
> +static struct hmap ofport_map = HMAP_INITIALIZER(&ofport_map);
> +
...
> +
> +/* Adds 'ofport' to the module with datapath port number 'odp_port'. 'ofport's
> + * must be added before they can be used by the module. If 'ofport' is not a
> + * tunnel, does nothing. Returns 0 on success, otherwise a positive errno
> + * value. */
> +int
> +tnl_port_add(const struct ofport *ofport, uint32_t odp_port)
> +{
> +    const struct netdev_tunnel_config *cfg;
> +    struct tnl_port *existing_port;
> +    struct tnl_port *tnl_port;
> +
> +    cfg = netdev_get_tunnel_config(ofport->netdev);
> +    if (!cfg) {
> +        return 0;
> +    }
> +
> +    tnl_port = xzalloc(sizeof *tnl_port);
> +    tnl_port->ofport = ofport;
> +    tnl_port->netdev_seq = netdev_change_seq(tnl_port->ofport->netdev);
> +
> +    tnl_port->match.in_key = cfg->in_key;
> +    tnl_port->match.ip_src = cfg->ip_src;
> +    tnl_port->match.ip_dst = cfg->ip_dst;

cfg->ip_src is actually the local IP address, so itwould go into the ip_dst field
for matching purposes, if match.ip_dst would mean "IP destination address of
the packet being matched". Ditto for cfg->ip_dst (which is the remote IP
address).

Also, it would be better to keep the "ip_local" and "ip_remote" terminology in the
tunnel cfg definition, as that unambiguously retains the intended semantics
whether sending or receiving.

> +    tnl_port->match.in_key_present = cfg->in_key_present;
> +    tnl_port->match.in_key_flow = cfg->in_key_flow;
> +    tnl_port->match.odp_port = odp_port;
> +
> +    existing_port = tnl_find_exact(&tnl_port->match);
> +    if (existing_port) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +        tnl_match_fmt(&tnl_port->match, &ds);
> +        VLOG_WARN("%s: attempting to add tunnel port with same config as "
> +                  "port '%s' (%s)", tnl_port_get_name(tnl_port),
> +                  tnl_port_get_name(existing_port), ds_cstr(&ds));
> +        ds_destroy(&ds);
> +        free(tnl_port);
> +        return EEXIST;
> +    }
> +
> +
> +    hmap_insert(&tnl_match_map, &tnl_port->match_node,
> +                tnl_hash(&tnl_port->match));
> +    hmap_insert(&ofport_map, &tnl_port->ofport_node,
> +                hash_pointer(tnl_port->ofport, 0));
> +    tnl_port_mod_log(tnl_port, "adding");
> +    return 0;
> +}

Could return the tnl_port here. NULL could be used instead of EEXIST, and the
test for existence of tunnel configuration is already done in port_construct(),
where this is called from (and which is the prefect place to initialize the tunnel
port pointer in to the ofport_dpif.

> +
> +static void
> +tnl_port_del__(struct tnl_port *tnl_port)
> +{
> +    tnl_port_mod_log(tnl_port, "removing");
> +    hmap_remove(&tnl_match_map, &tnl_port->match_node);
> +    hmap_remove(&ofport_map, &tnl_port->ofport_node);
> +    free(tnl_port);
> +}

This would become the tnl_port_del(), if ofport_dpif hold the pointer.

...
> 
> +/* Returns true if 'flow' should be submitted to tnl_port_receive(). */
> +bool
> +tnl_port_should_receive(const struct flow *flow)
> +{
> +    return flow->tunnel.ip_dst != 0;
> +}

Move this to tunnel.h as a static inline?

> +
> +/* Transforms 'flow' so that it appears to have been received by a tunnel
> + * OpenFlow port controlled by this module instead of the datapath port it
> + * actually came in on.  Sets 'flow''s in_port to the appropriate OpenFlow port
> + * number.  Returns the 'ofport' corresponding to the new in_port.
> + *
> + * Callers should verify that 'flow' needs to be received by calling
> + * tnl_port_should_receive() before this function.
> + *
> + * Leaves 'flow' untouched and returns null if unsuccessful. */
> +const struct ofport *
> +tnl_port_receive(struct flow *flow)
> +{
> +    char *pre_flow_str = NULL;
> +    struct tnl_port *tnl_port;
> +    struct tnl_match match;
> +
> +    memset(&match, 0, sizeof match);
> +    match.odp_port = flow->in_port;
> +    match.ip_src = flow->tunnel.ip_dst;
> +    match.ip_dst = flow->tunnel.ip_src;

See my point above.

> +    match.in_key = flow->tunnel.tun_id;
> +    match.in_key_present = flow->tunnel.flags & FLOW_TNL_F_KEY;
> +
> +    tnl_port = tnl_find(&match);
> +    if (!tnl_port) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +        tnl_match_fmt(&match, &ds);
> +        VLOG_WARN_RL(&rl, "receive tunnel port not found (%s)", ds_cstr(&ds));
> +        ds_destroy(&ds);
> +
> +        return NULL;
> +    }
> +
> +    if (is_ip(flow)
> +        && ((flow->tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE)
> +        && (flow->nw_tos & IP_ECN_MASK) == IP_ECN_NOT_ECT) {
> +        VLOG_WARN_RL(&rl, "dropping tunnel packet marked ECN CE but is not ECN"
> +                     " capable");
> +        return NULL;
> +    }
> +
> +    if (!VLOG_DROP_DBG(&dbg_rl)) {
> +        pre_flow_str = flow_to_string(flow);
> +    }
> +
> +    flow->in_port = tnl_port->ofport->ofp_port;
> +
> +    memset(&flow->tunnel, 0, sizeof flow->tunnel);
> +    flow->tunnel.tun_id = match.in_key;
> +

Is this really necessary? I imagine we could want to match on also the other
tunnel fields on the flow-basis in the future. Also, this seems like a change
to the current tunneling behavior, so it should not be a problem to let the
tunnel data be?

Also, since odp_flow_key_from_flow() ignores the flow.in_port and takes
the odp port as an argument, we could let the tunnel fitness be "PERFECT"
and save some memory, if we keep the flow->tunnel untouched.

> 
> +    if (pre_flow_str) {
> +        char *post_flow_str = flow_to_string(flow);
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +
> +        tnl_port_fmt(tnl_port, &ds);
> +        VLOG_DBG("flow received\n"
> +                 "%s"
> +                 " pre: %s\n"
> +                 "post: %s", ds_cstr(&ds),
> +                 pre_flow_str, post_flow_str);
> +        ds_destroy(&ds);
> +        free(pre_flow_str);
> +        free(post_flow_str);
> +    }
> +    return tnl_port->ofport;
> +}
> +
> +/* Given that 'flow' should be output to 'ofport', updates 'flow''s tunnel
> + * headers and sets 'odp_port' to the actual datapath port that the output
> + * should happen on.  Returns true if successful, false if 'ofport' is not a
> + * tunnel, or there was a problem. */
> +bool
> +tnl_port_send(const struct ofport *ofport, struct flow *flow,
> +              uint32_t *odp_port)
> +{
> +    const struct netdev_tunnel_config *cfg;
> +    struct tnl_port *tnl_port;
> +    char *pre_flow_str = NULL;
> +
> +    tnl_port = find_ofport(ofport);
> +    if (!tnl_port) {
> +        return false;
> +    }
> +

Thes would be unnecessary, if the caller passes the tnl_port as the 1st
argument instead of the ofport.

> +    cfg = netdev_get_tunnel_config(tnl_port->ofport->netdev);
> +    if (!cfg) {
> +        VLOG_ERR("tnl_port missing its configuration");
> +        return false;
> +    }
> +
> +    if (!VLOG_DROP_DBG(&dbg_rl)) {
> +        pre_flow_str = flow_to_string(flow);
> +    }
> +
> +    flow->tunnel.ip_src = tnl_port->match.ip_src;
> +    flow->tunnel.ip_dst = tnl_port->match.ip_dst;

These could become match.ip_dst and match.ip_src. if that is unclear,
then it would be better to retain the "local_ip"/"remote_ip" terminology
also in the tnl_match, However the word "match" could be considered
to refer to input...

> +
> +    if (cfg->out_key_flow) {
> +        flow->tunnel.tun_id = flow->tunnel.tun_id;
> +    } else {
> +        flow->tunnel.tun_id = cfg->out_key;
> +    }
> +
> +    if (cfg->ttl_inherit && is_ip(flow)) {
> +        flow->tunnel.ip_ttl = flow->nw_ttl;
> +    } else {
> +        flow->tunnel.ip_ttl = cfg->ttl;
> +    }
> +
> +    if (cfg->tos_inherit && is_ip(flow)) {
> +        flow->tunnel.ip_tos = flow->nw_tos & IP_DSCP_MASK;
> +    } else {
> +        flow->tunnel.ip_tos = cfg->tos;
> +    }
> +    if ((flow->nw_tos & IP_ECN_MASK) == IP_ECN_CE) {
> +        flow->tunnel.ip_tos |= IP_ECN_ECT_0;
> +    } else {
> +        flow->tunnel.ip_tos |= flow->nw_tos & IP_ECN_MASK;
> +    }
> +
> +    flow->tunnel.flags = (cfg->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 0)
> +        | (cfg->csum ? FLOW_TNL_F_CSUM : 0)
> +        | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0);
> +
> +    if (pre_flow_str) {
> +        struct ds ds = DS_EMPTY_INITIALIZER;
> +        char *post_flow_str;
> +
> +        post_flow_str = flow_to_string(flow);
> +        tnl_port_fmt(tnl_port, &ds);
> +
> +        VLOG_DBG("flow sent\n"
> +                 "%s"
> +                 " pre: %s\n"
> +                 "post: %s", ds_cstr(&ds),
> +                 pre_flow_str, post_flow_str);
> +
> +        ds_destroy(&ds);
> +        free(pre_flow_str);
> +        free(post_flow_str);
> +    }
> +
> +    *odp_port = tnl_port->match.odp_port;
> +    return true;
> +}




More information about the dev mailing list