[ovs-dev] [PATCH v4 2/5] ovn: Add generic HA chassis group

Numan Siddique nusiddiq at redhat.com
Wed Mar 27 10:00:14 UTC 2019


On Wed, Mar 27, 2019 at 12:32 AM Han Zhou <zhouhan at gmail.com> wrote:

> On Tue, Mar 26, 2019 at 11:08 AM Numan Siddique <nusiddiq at redhat.com>
> wrote:
> >
> >
> >
> > On Tue, Mar 26, 2019 at 11:21 PM Han Zhou <zhouhan at gmail.com> wrote:
> >>
> >> On Tue, Mar 26, 2019 at 9:59 AM Numan Siddique <nusiddiq at redhat.com>
> wrote:
> >> >
> >> >
> >> > Thanks Han for the review. I will address them
> >> > Please see comments inline
> >> >
> >> > Thanks
> >> > Numan
> >> >
> >> >
> >> > On Tue, Mar 26, 2019 at 8:07 AM Han Zhou <zhouhan at gmail.com> wrote:
> >> >>
> >> >> Mostly looks good to me, just minor comments.
> >> >>
> >> >> On Thu, Mar 14, 2019 at 12:32 PM <nusiddiq at redhat.com> wrote:
> >> >> >
> >> >> > +static void
> >> >> > +build_lrouter_groups__(struct hmap *ports, struct ovn_datapath
> *od)
> >> >> > +{
> >> >> > +    ovs_assert((od && od->nbr && od->lr_group));
> >> >> > +
> >> >> > +    if (od->l3dgw_port && od->l3redirect_port) {
> >> >> > +        /* It's a logical router with gateway port. If it
> >> >> > +         * has HA_Chassis_Group associated to it in SB DB, then
> store the
> >> >> > +         * ha chassis group name. */
> >> >> > +        if (od->l3redirect_port->sb->ha_chassis_group) {
> >> >> > +            sset_add(&od->lr_group->ha_chassis_groups,
> >> >> > +
>  od->l3redirect_port->sb->ha_chassis_group->name);
> >> >> > +        }
> >> >> > +    }
> >> >> > +
> >> >> > +    for (size_t i = 0; i < od->nbr->n_ports; i++) {
> >> >> > +        struct ovn_port *router_port =
> >> >> > +            ovn_port_find(ports, od->nbr->ports[i]->name);
> >> >> > +
> >> >> > +        if (!router_port || !router_port->peer) {
> >> >> > +            continue;
> >> >> > +        }
> >> >> > +
> >> >> > +        /* Get the peer logical switch/logical router datapath. */
> >> >> > +        struct ovn_datapath *peer_dp = router_port->peer->od;
> >> >> > +        if (peer_dp->nbr) {
> >> >> > +            if (!peer_dp->lr_group) {
> >> >> > +                peer_dp->lr_group = od->lr_group;
> >> >> > +
> od->lr_group->router_dps[od->lr_group->n_router_dps++]
> >> >> > +                    = peer_dp;
> >> >> > +                build_lrouter_groups__(ports, peer_dp);
> >> >> > +            }
> >> >> > +        } else {
> >> >> > +            for (size_t j = 0; j < peer_dp->n_router_ports; j++) {
> >> >> > +                if (!peer_dp->router_ports[j]->peer) {
> >> >> > +                    /* If there is no peer port connecting to the
> >> >> > +                    * router datapath, ignore it. */
> >> >>
> >> >> s/router datapath/router port
> >> >>
> >> >> > +                    continue;
> >> >> > +                }
> >> >> > +
> >> >>
> >> >> > +static void
> >> >> > +build_lrouter_groups(struct hmap *ports, struct ovs_list *lr_list)
> >> >> > +{
> >> >> > +    struct ovn_datapath *od;
> >> >> > +    size_t n_router_dps = ovs_list_size(lr_list);
> >> >> > +
> >> >> > +    LIST_FOR_EACH (od, lr_list, lr_list) {
> >> >> > +        if (!od->lr_group) {
> >> >> > +            od->lr_group = xzalloc(sizeof *od->lr_group);
> >> >> > +            /* Each logical router group can have max
> >> >> > +             * 'n_router_dps'. So allocate enough memory. */
> >> >> > +            od->lr_group->router_dps = xcalloc(sizeof *od,
> n_router_dps);
> >> >> > +            od->lr_group->router_dps[od->lr_group->n_router_dps]
> = od;
> >> >>
> >> >> Here it is more clear to be: od->lr_group->router_dps[0] = od;
> >> >>
> >> >> > +            od->lr_group->n_router_dps = 1;
> >> >> > +            sset_init(&od->lr_group->ha_chassis_groups);
> >> >> > +            build_lrouter_groups__(ports, od);
> >> >> > +        }
> >> >> > +    }
> >> >> > +}
> >> >> > +
> >> >>
> >> >> >  static void
> >> >> > -destroy_datapaths_and_ports(struct hmap *datapaths, struct hmap
> *ports)
> >> >> > +destroy_datapaths_and_ports(struct hmap *datapaths, struct hmap
> *ports,
> >> >> > +                            struct ovs_list *lr_list)
> >> >> >  {
> >> >> > +    struct ovn_datapath *router_dp;
> >> >> > +    LIST_FOR_EACH_POP (router_dp, lr_list, lr_list) {
> >> >> > +        if (router_dp->lr_group) {
> >> >> > +            struct lrouter_group *lr_group = router_dp->lr_group;
> >> >> > +
> >> >> > +            for (size_t i = 0; i <
> router_dp->lr_group->n_router_dps; i++) {
> >> >> > +                if (router_dp->lr_group->router_dps[i] !=
> router_dp) {
> >> >>
> >> >> This if-condition seems not needed.
>

Actually it is needed. I first thought the same. But there were crashes
because of NULL pointer deference.
If we don't have this check, then the check "i <
router_dp->lr_group->n_router_dps" in the for loop would
result in the NULL pointer dereference.



> >> >>
> >> >> > +                    router_dp->lr_group->router_dps[i]->lr_group
> = NULL;
> >> >> > +                }
> >> >> > +            }
> >> >>
> >> >> s/router_dp->lr_group/lr_group in above for-loop.
> >> >>
> >> >> > +
> >> >> > +            free(lr_group->router_dps);
> >> >> > +            sset_destroy(&lr_group->ha_chassis_groups);
> >> >> > +            free(lr_group);
> >> >> > +            router_dp->lr_group = NULL;
> >> >> > +        }
> >> >> > +    }
> >> >> > +
> >> >>
> >> >> > diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
> >> >> > index 10a59649a..48d27b960 100644
> >> >> > --- a/ovn/ovn-nb.ovsschema
> >> >> > +++ b/ovn/ovn-nb.ovsschema
> >> >> > @@ -1,7 +1,7 @@
> >> >> >  {
> >> >> >      "name": "OVN_Northbound",
> >> >> > -    "version": "5.14.1",
> >> >> > -    "cksum": "3758097843 20509",
> >> >> > +    "version": "5.15.0",
> >> >> > +    "cksum": "1078795414 21917",
> >> >> >      "tables": {
> >> >> >          "NB_Global": {
> >> >> >              "columns": {
> >> >> > @@ -271,6 +271,12 @@
> >> >> >                                       "refType": "strong"},
> >> >> >                               "min": 0,
> >> >> >                               "max": "unlimited"}},
> >> >> > +                "ha_chassis_group": {
> >> >> > +                    "type": {"key": {"type": "uuid",
> >> >> > +                                     "refTable":
> "HA_Chassis_Group",
> >> >> > +                                     "refType": "weak"},
> >> >>
> >> >> Shall this be strong ref instead? In normal case logical ports hosted
> >> >> by a HA-chassis-group should be deleted before the HA-chassis-group
> is
> >> >> removed. (same for SB schema)
> >> >
> >> >
> >> > I would prefer weak ref because it would be easier for CMS to manage
> the references.
> >> > It doesn't need to keep track of the ref count before deleting the
> HA_Chassis_Group row.
> >> >
> >>
> >> Sorry that I don't understand why it requires ref count. With strong
> >> reference, it just prevents operators to delete HA_Chassis_Group by
> >> mistake (i.e. when they are still used by routers). CMS can just
> >> simply return an error in this case, without any ref counter
> >> maintenance. Otherwise, if it is weak reference, an operator can
> >> delete HA_Chassis_Group even if it is still in use, which causes the
> >> related logical router ports being converted from centralized to
> >> distributed, implicitly, which I think is not good.
> >>
> >
> > Agree that operator can delete HA_Chassis_Group by mistake even if
> > logical ports/logical router ports are still referencing the
> HA_Chassis_Group.
> >
> > What I mean is, CMS cannot delete the HA_Chassis_Group rows unless
> > it clears all the references to HA_Chassis_Group.
> >
> > In a recent commit, we changed the logical switches and logical routers
> > referencing the Load_Balancer to weak mainly because of this.
> >  -
> https://github.com/openvswitch/ovs/commit/612f80fa8ebf88dad2e204364c6c02b451dca36c
> >
> > That's why I thought to have a weak reference.
> > You still think its good to change to strong ?
> >
>
> Yes I am well aware of the LB change - in fact we are working on a
> workaround in go-ovn project for older version of OVN that still has
> strong reference for LB.
> However, I think that is a different scenario. For a logical switch, a
> LB is something additionally added, and from an operators point of
> view it is pretty common to add/remove LB(s) to/from a logical switch.
> However, for a logical router port, it is usually pre-defined that it
> is a GW port or distributed port, when the deployment model is
> decided. Although it is possible to convert a GW port to a distributed
> port, it is uncommon. If it is really what the operator wants to do,
> it should be handled explicitly by updating the logical router port,
> rather than implicitly by removing the HA_Chassis_Group. Does this
> make sense?
>
>
CMS could use an HA_Chassis_Group both for associating it with multiple
logical router ports and also with multiple "external" ports.

 I am still little concerned from CMS point of view.

Eg.
ovn-nbctl ha-chassis-group-add-chassis hagrp1 ch1 30
ovn-nbctl ha-chassis-group-add-chassis hagrp1 ch2 20
ovn-nbctl ha-chassis-group-add-chassis hagrp1 ch3 10

ovn-nbctl set logical_router_port lr0-public ha_chassis_group=<HAGRP1_UUID>
ovn-nbctl set logical_router_port lr1-public ha_chassis_group=<HAGRP1_UUID>
ovn-nbctl set logical_router_port lr2-public ha_chassis_group=<HAGRP1_UUID>

ovn-nbctl set logical_switch_port ext-p1 ha_chasssi_group=<HAGRP1_UUID>
ovn-nbctl set logical_switch_port ext-p2 ha_chasssi_group=<HAGRP1_UUID>
ovn-nbctl set logical_switch_port ext-p3 ha_chasssi_group=<HAGRP1_UUID>
..
ovn-nbctl set logical_switch_port ext-pn ha_chasssi_group=<HAGRP1_UUID>

In the above case, CMS should know the list of logical router ports/logical
switch ports
and can delete the "hagrp1" only when all the references are cleared.

But I agree with your point that unlike load balancer which can be
independently created
or deleted that's not the case with HA_Chassis_Group.

I will go ahead and change it to "strong". We can probably revert it to
"false" later if
it is hard for CMS to manage the references.

Thanks


>
> >
> >>
> >> >
> >> >>
> >> >>
> >> >> > +                             "min": 0,
> >> >> > +                             "max": 1}},
> >> >> >                  "options": {
> >> >> >                      "type": {"key": "string",
> >> >> >                               "value": "string",
> >> >> > @@ -392,5 +398,29 @@
> >> >> >                      "type": {"key": "string", "value": "string",
> >> >> >                               "min": 0, "max": "unlimited"}}},
> >> >> >              "indexes": [["name"]],
> >> >> > -            "isRoot": false}}
> >> >> > +            "isRoot": false},
> >> >> > +        "HA_Chassis": {
> >> >> > +            "columns": {
> >> >> > +                "chassis_name": {"type": "string"},
> >> >> > +                "priority": {"type": {"key": {"type": "integer",
> >> >> > +                                              "minInteger": 0,
> >> >> > +                                              "maxInteger":
> 32767}}},
> >> >> > +                "external_ids": {
> >> >> > +                    "type": {"key": "string", "value": "string",
> >> >> > +                             "min": 0, "max": "unlimited"}}},
> >> >> > +            "isRoot": false},
> >> >> > +        "HA_Chassis_Group": {
> >> >> > +            "columns": {
> >> >> > +                "name": {"type": "string"},
> >> >> > +                "ha_chassis": {
> >> >> > +                    "type": {"key": {"type": "uuid",
> >> >> > +                                     "refTable": "HA_Chassis",
> >> >> > +                                     "refType": "strong"},
> >> >>
> >> >> Is it better to be weak ref here, considering that multiple a
> >> >> HA-chassis can belong to multiple groups? (same for SB schema)
> >> >
> >> >
> >> > If you see HA_Chassis is non root. and hence I think it should be
> strong ref.
> >> > And I think it would be easier for CMS to manage. Deleting
> HA_Chassis_Group will delete its
> >> > ha chassis list.
> >>
> >> "Deleting HA_Chassis_Group will delete its ha chassis list." => This
> >> is the behavior of *weak* reference, right? It seems we have reverse
> >> understanding about strong v.s. weak references :)
> >
> >
> > Deleting HA_Chassis_Group will delete the HA_Chassis rows because -
> HA_Chassis is "non root".
> > (Just like how Logical switch references the Logical_Switch_Ports.
> Logical_Switch_Port is "non root"
> > and is strongly referenced by Logical_Switch and deleting Logical_Switch
> row, also deletes the Logical_Switch_Ports
> > of that Logical_Switch).  Same is used here.
> >
>
> You are right. I just went over the rfc7047 again and realized it was
> my misunderstanding. Thanks for correcting me! So I agree with you
> that to keep HA_Chassis non-root, strong reference is the only choice.
> (though I don't quite understand the design in rfc7047 why weak
> reference can't prevent a non-root row being deleted - only strong
> reference prevents that.)
>
> > As I stated earlier, I think its better that HA_Chassis is not
> referenced by multiple HA_Chassis_Group rows.  Otherwise
> > the code in ovn-northd would get complicated to sync in SB DB.
>
> Also, forget about my statement "a HA-chassis can belong to multiple
> groups". My brain wasn't clear :(
>
>
No worries :).

Thanks for all the comments and feedback.

 Numan


> >
> > Thanks
> > Numan
> >
> >
> >>
> >>
> >> >
> >> > I think it would complicate the code in ovn-northd when syncing with
> the SB DB if HA_Chassis is
> >> > root and it is referenced by multiple HA_Chassis_Group rows.
> >> >
> >> > So I would prefer HA_Chassis to be non root and HA_Chassis_Group has
> a strong ref to HA_Chassis table.
> >> >
> >> > Thanks
> >> > Numan
> >> >
> >> >>
> >> >>
> >> >> > +                             "min": 0,
> >> >> > +                             "max": "unlimited"}},
> >> >> > +                "external_ids": {
> >> >> > +                    "type": {"key": "string", "value": "string",
> >> >> > +                             "min": 0, "max": "unlimited"}}},
> >> >> > +            "indexes": [["name"]],
> >> >> > +            "isRoot": true}}
> >> >> >      }
> >> >>
> >> >> I will get back to 3/5 - 5/5 reviewing asap.
>


More information about the dev mailing list