[ovs-dev] [PATCH ovn v2 2/3] actions: Add new OVN logical fields 'ip.src' and ip.dst'.

Numan Siddique numans at ovn.org
Wed Oct 14 12:13:14 UTC 2020


On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara at redhat.com> wrote:
>
> On 10/14/20 1:54 PM, Numan Siddique wrote:
> > On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara at redhat.com> wrote:
> >>
> >> On 10/14/20 12:50 PM, Numan Siddique wrote:
> >>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara at redhat.com> wrote:
> >>>>
> >>>> On 10/14/20 11:15 AM, numans at ovn.org wrote:
> >>>>> From: Numan Siddique <numans at ovn.org>
> >>>>>
> >>>>> Thee new fields are version independent and these can be used in any
> >>>>
> >>>> Hi Numan,
> >>>>
> >>>> Nit: s/Thee/These
> >>>>
> >>>>> OVN action. Right now the usage of these fields are restricted to
> >>>>> exchanging the IP source and destination fields.
> >>>>>
> >>>>> Eg. reject {... ip.src <-> ip.dst ... };
> >>>>>
> >>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> >>>>> When pinctrl thread receives the packet in, it checks the IP version of the
> >>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> >>>>> resumes the packet to continue with the pipeline.
> >>>>>
> >>>>> Upcoming patch will make use of these fields.
> >>>>>
> >>>>> Signed-off-by: Numan Siddique <numans at ovn.org>
> >>>>> ---
> >>>>>  controller/pinctrl.c         |  49 +++++++++++++++
> >>>>>  include/ovn/actions.h        |   4 ++
> >>>>>  include/ovn/logical-fields.h |  18 ++++++
> >>>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
> >>>>>  lib/logical-fields.c         |  10 +++
> >>>>>  ovn-sb.xml                   |  37 +++++++++++
> >>>>>  tests/ovn.at                 |  27 ++++++++
> >>>>>  utilities/ovn-trace.c        |  28 +++++++++
> >>>>>  8 files changed, 291 insertions(+)
> >>>>>
> >>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>>>> index 631d058458..bc16a82404 100644
> >>>>> --- a/controller/pinctrl.c
> >>>>> +++ b/controller/pinctrl.c
> >>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >>>>>                                               struct ofputil_packet_in *pin,
> >>>>>                                               struct ofpbuf *userdata,
> >>>>>                                               struct ofpbuf *continuation);
> >>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>> +                                           const struct flow *in_flow,
> >>>>> +                                           struct dp_packet *pkt_in,
> >>>>> +                                           struct ofputil_packet_in *pin,
> >>>>> +                                           struct ofpbuf *continuation);
> >>>>>  static void
> >>>>>  pinctrl_handle_event(struct ofpbuf *userdata)
> >>>>>      OVS_REQUIRES(pinctrl_mutex);
> >>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >>>>>          ovs_mutex_unlock(&pinctrl_mutex);
> >>>>>          break;
> >>>>>
> >>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> >>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> >>>>> +                                       &continuation);
> >>>>> +        break;
> >>>>> +
> >>>>>      default:
> >>>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >>>>>                       ntohl(ah->opcode));
> >>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >>>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
> >>>>>      }
> >>>>>  }
> >>>>> +
> >>>>> +static void
> >>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>> +                               const struct flow *in_flow,
> >>>>> +                               struct dp_packet *pkt_in,
> >>>>> +                               struct ofputil_packet_in *pin,
> >>>>> +                               struct ofpbuf *continuation)
> >>>>> +{
> >>>>> +    enum ofp_version version = rconn_get_version(swconn);
> >>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> >>>>> +    struct dp_packet *pkt_out;
> >>>>> +
> >>>>> +    pkt_out = dp_packet_clone(pkt_in);
> >>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> >>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> >>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> >>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> >>>>> +
> >>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> >>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> >>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> >>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> >>>>> +    } else {
> >>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> >>>>> +         * We could also use packet_set_ipv6() here, but that would require
> >>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> >>>>> +         * unnecessary. */
> >>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> >>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> >>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
> >>>>> +        out_nh->ip6_dst = tmp;
> >>>>> +    }
> >>>>> +
> >>>>> +    pin->packet = dp_packet_data(pkt_out);
> >>>>> +    pin->packet_len = dp_packet_size(pkt_out);
> >>>>> +
> >>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> >>>>> +    dp_packet_delete(pkt_out);
> >>>>> +}
> >>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> >>>>> index b4e5acabb9..bf1fe848b7 100644
> >>>>> --- a/include/ovn/actions.h
> >>>>> +++ b/include/ovn/actions.h
> >>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >>>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >>>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >>>>>      OVNACT(REJECT,            ovnact_nest)            \
> >>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >>>>>
> >>>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >>>>>  enum OVS_PACKED_ENUM ovnact_type {
> >>>>> @@ -612,6 +613,9 @@ enum action_opcode {
> >>>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
> >>>>>       */
> >>>>>      ACTION_OPCODE_REJECT,
> >>>>> +
> >>>>> +    /* ip.src <-> ip.dst */
> >>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>  };
> >>>>>
> >>>>>  /* Header. */
> >>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> >>>>> index ac6f2f909b..bb6daa50ca 100644
> >>>>> --- a/include/ovn/logical-fields.h
> >>>>> +++ b/include/ovn/logical-fields.h
> >>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> >>>>>       */
> >>>>>      OVN_ICMP6_FRAG_MTU,
> >>>>>
> >>>>> +    /*
> >>>>> +     * Name: "ip.src"
> >>>>> +     * Type: be128
> >>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> >>>>> +     *              or sets the field MFF_IPV6_SRC if
> >>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>> +     */
> >>>>> +    OVN_IP_SRC,
> >>>>> +
> >>>>> +    /*
> >>>>> +     * Name: "ip.dst"
> >>>>> +     * Type: be128
> >>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> >>>>> +     *              or sets the field MFF_IPV6_DST if
> >>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>> +     */
> >>>>> +    OVN_IP_DST,
> >>>>> +
> >>>>>      OVN_FIELD_N_IDS
> >>>>>  };
> >>>>>
> >>>>> diff --git a/lib/actions.c b/lib/actions.c
> >>>>> index 1e1bdeff24..e9c77d2a0a 100644
> >>>>> --- a/lib/actions.c
> >>>>> +++ b/lib/actions.c
> >>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
> >>>>>  {
> >>>>>  }
> >>>>>
> >>>>> +static bool
> >>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> >>>>> +{
> >>>>> +    switch (field->symbol->ovn_field->id) {
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +        return true;
> >>>>> +
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> >>>>> +                    field->symbol->name);
> >>>>> +        return false;
> >>>>> +
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>> +}
> >>>>> +
> >>>>>  static void
> >>>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
> >>>>>  {
> >>>>>      size_t ofs = ctx->ovnacts->size;
> >>>>>      struct ovnact_load *load;
> >>>>>      if (lhs->symbol->ovn_field) {
> >>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
> >>>>> +            return;
> >>>>> +        }
> >>>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >>>>>      } else {
> >>>>>          load = ovnact_put_LOAD(ctx->ovnacts);
> >>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
> >>>>>      format_assignment(move, "<->", s);
> >>>>>  }
> >>>>>
> >>>>> +static void
> >>>>> +parse_ovnfield_exchange(struct action_context *ctx,
> >>>>> +                        const struct expr_field *lhs,
> >>>>> +                        const struct expr_field *rhs)
> >>>>> +{
> >>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> >>>>> +    move->lhs = *lhs;
> >>>>> +    move->rhs = *rhs;
> >>>>> +}
> >>>>> +
> >>>>>  static void
> >>>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>                          const struct expr_field *lhs)
> >>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>          return;
> >>>>>      }
> >>>>>
> >>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> >>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>>      const struct expr_symbol *ls = lhs->symbol;
> >>>>>      const struct expr_symbol *rs = rhs.symbol;
> >>>>>      if ((ls->width != 0) != (rs->width != 0)) {
> >>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
> >>>>>                        ntohs(load->imm.value.be16_int));
> >>>>>          break;
> >>>>>
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>>      case OVN_FIELD_N_IDS:
> >>>>>      default:
> >>>>>          OVS_NOT_REACHED();
> >>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> >>>>>          encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>          break;
> >>>>>      }
> >>>>> +
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>>      case OVN_FIELD_N_IDS:
> >>>>>      default:
> >>>>>          OVS_NOT_REACHED();
> >>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> >>>>>      free(fwd_group->child_ports);
> >>>>>  }
> >>>>>
> >>>>> +static void
> >>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> >>>>> +{
> >>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> >>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> >>>>> +    switch (lhs->id) {
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        break;
> >>>>> +
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>
> >>>> Nit: I would use the shorter:
> >>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> >>>
> >>> You mean to drop the switch right ? If so, Ack.
> >>>
> >>
> >> Right, I meant ovs_assert instead of switch.
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +    switch (rhs->id) {
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        break;
> >>>>> +
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>
> >>>> Nit: I would use the shorter:
> >>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> >>>>
> >>>>> +
> >>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> >>>>> +}
> >>>>> +
> >>>>> +static void
> >>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> >>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> >>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> >>>>> +{
> >>>>> +    /* Right now we only support exchanging the IPs in
> >>>>> +     * OVN fields (ip.src <-> ip.dst). */
> >>>>> +    size_t oc_offset =
> >>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
> >>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
> >>>>
> >>>> I think this means that all packets matching a flow with action "reject { ...
> >>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
> >>>> reject action, one for the ovnfield-exchange action.
> >>>>
> >>>> Is that a concern wrt. performance?
> >>>
> >>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
> >>> icmp4.frag_mtu = 1500; ...};
> >>>
> >>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
> >>> in reject and let reject action
> >>> swap the ip src with ip destination. I thought about that and my take
> >>> is that since reject { } results in
> >>> TCP RST or ICMP unreachable packet, it should be fine. I see these
> >>> packets as more of control packets.
> >>>
> >>> I think it should be ok. Let me know what you think.
> >>>
> >>
> >> I think I like the approach of "reject" action implicitly swapping IPs best.
> >> In the end it's not like we could implement reject withough swapping IP dst
> >> with IP src so why not do it as part of the reject action.
> >>
> >
> > Agree. reject action is expected to be used to send the generated tcp
> > rst/icmp unreachable
> > packet back to the sender.
> >
> > I'm ok to change the reject action to just swap the IPs internally if
> > everyone is fine.
> >
> > @Mark Michelson  @Han Zhou  Do you have any comments here ?
> >
> > Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
> > will swap the ip source and destination
> >
> > Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> > approach in this patch series. This results in
> > 2 packet-ins.
> >
> > I personally prefer (2) since in OVN we normally tell what inner
> > actions to apply for many OVN actions.
> > But I have no strong preference and I'm fine changing to (1).
> >
> > If we chose option (1), we could add a comment inside the action like
> > - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> > eth.dst; output ; }
> >
>
> Then, why not swap ETH addresses implicitly too?

As I mentioned above, I'd prefer if ovn-northd says what actions to
apply for the reject
packet. Just like for icmp4 or arp actions, ovn-northd says what
actions to apply for the
transformed packet from the source packet.

Thanks
Numan

>
> >
> >> But on the other hand, I think there might be other simpler ways to generate a
> >> lot of packet-ins towards ovn-controller that I don't have a strong preference
> >> about either solution.  I just wanted to make sure we discuss the performance
> >> implications.
> >
> > In my opinion there should not be any performance implications.
> >
> >
> >>
> >>>
> >>>>
> >>>>> +}
> >>>>> +
> >>>>>  /* Parses an assignment or exchange or put_dhcp_opts action. */
> >>>>>  static void
> >>>>>  parse_set_action(struct action_context *ctx)
> >>>>> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> >>>>> index bf61df7719..d6f1981f52 100644
> >>>>> --- a/lib/logical-fields.c
> >>>>> +++ b/lib/logical-fields.c
> >>>>> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
> >>>>>          OVN_ICMP6_FRAG_MTU,
> >>>>>          "icmp6.frag_mtu",
> >>>>>          4, 32,
> >>>>> +    }, {
> >>>>> +        OVN_IP_SRC,
> >>>>> +        "ip.src",
> >>>>> +        16, 128,
> >>>>> +    }, {
> >>>>> +        OVN_IP_DST,
> >>>>> +        "ip.dst",
> >>>>> +        16, 128,
> >>>>
> >>>> Just partially related to your change, I'm a bit confused by the meaning of
> >>>> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
> >>>> those values.  Is that the case or am I missing something?
> >>>
> >>> These OVN fields are defined more like the way - mf_fields are defined.
> >>> Like icmp4.frag_mtu is 2 bytes and 16 bits field.
> >>>
> >>> So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
> >>> size of ipv6.
> >>>
> >>> Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
> >>> example, these fields
> >>> are not used. Later on if there is a need to support assigning a value
> >>> to these fields, these fields
> >>> could be used. Although I don't see a need to support it since we
> >>> already have ip4.src/ip
> >
> >>>
> >>
> >> Ok, my question was more about the fact that I don't see the ovn_field.n_bytes
> >> or ovn_fields.n_bits being used anywhere in general (not referring to
> >> ip.src/ip.dst only).
> >>
> >
> > Actually they are being used. See
> > https://github.com/ovn-org/ovn/blob/master/lib/actions.c#L394
> > and https://github.com/ovn-org/ovn/blob/master/lib/expr.c#L569
> >
> > If you add the below in the ovn -- action parsing test case in ovn.at
> > ---
> > icmp4.frag_mtu = 65536;
> > ---
> >
> > You'll see the below error:
> > 17-bit constant is not compatible with 16-bit field icmp4.frag_mtu.
> >
>
> Ah, you're right, sorry about the noise, I see it now.
>
> Thanks,
> Dumitru
>
> _______________________________________________
> dev mailing list
> dev at openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>


More information about the dev mailing list