[ovs-discuss] Adding a new vlan will caused the host lost connection

Alex Wang alexw at nicira.com
Fri Jun 7 16:52:54 UTC 2013


It is recommended that you upgrade to version 1.10 or above, since this bug
is backported only to branch-1.10

If you do not want, you may try this fix of mine, below. It is for
branch-1.7



---
 ofproto/ofproto-dpif.c |   44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 53fe172..30bb28b 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -528,8 +528,8 @@ struct vlan_splinter {
     int vid;
 };

-static uint32_t vsp_realdev_to_vlandev(const struct ofproto_dpif *,
-                                       uint32_t realdev, ovs_be16
vlan_tci);
+static uint16_t vsp_realdev_to_vlandev(const struct ofproto_dpif *,
+                                       uint16_t realdev, ovs_be16
vlan_tci);
 static uint16_t vsp_vlandev_to_realdev(const struct ofproto_dpif *,
                                        uint16_t vlandev, int *vid);
 static bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *);
@@ -4665,13 +4665,17 @@ send_packet(const struct ofport_dpif *ofport,
struct ofpbuf *packet)
     uint16_t odp_port;
     struct flow flow;
     int error;
+    uint16_t vlandev_port;

     flow_extract((struct ofpbuf *) packet, 0, 0, 0, &flow);
-    odp_port = vsp_realdev_to_vlandev(ofproto, ofport->odp_port,
-                                      flow.vlan_tci);
-    if (odp_port != ofport->odp_port) {
+    vlandev_port = vsp_realdev_to_vlandev(ofproto, ofport->up.ofp_port,
+                                          flow.vlan_tci);
+    if (vlandev_port != ofport->up.ofp_port) {
+        odp_port = ofp_port_to_odp_port(vlandev_port);
         eth_pop_vlan(packet);
         flow.vlan_tci = htons(0);
+    } else {
+        odp_port = ofport->odp_port;
     }

     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
@@ -4856,6 +4860,7 @@ compose_output_action__(struct action_xlate_ctx *ctx,
uint16_t ofp_port,
     ovs_be16 flow_vlan_tci = ctx->flow.vlan_tci;
     uint8_t flow_nw_tos = ctx->flow.nw_tos;
     uint16_t out_port;
+    uint16_t vlandev_port;

     if (ofport) {
         struct priority_to_dscp *pdscp;
@@ -4876,11 +4881,15 @@ compose_output_action__(struct action_xlate_ctx
*ctx, uint16_t ofp_port,
          * later and we're pre-populating the flow table.  */
     }

-    out_port = vsp_realdev_to_vlandev(ctx->ofproto, odp_port,
-                                      ctx->flow.vlan_tci);
-    if (out_port != odp_port) {
+    vlandev_port = vsp_realdev_to_vlandev(ctx->ofproto, ofp_port,
+                                          ctx->flow.vlan_tci);
+    if (vlandev_port != ofp_port) {
+        out_port = ofp_port_to_odp_port(vlandev_port);
         ctx->flow.vlan_tci = htons(0);
+    } else {
+        out_port = odp_port;
     }
+
     commit_odp_actions(&ctx->flow, &ctx->base_flow, ctx->odp_actions);
     nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port);

@@ -6955,18 +6964,17 @@ hash_realdev_vid(uint16_t realdev_ofp_port, int vid)
 }

 /* Returns the ODP port number of the Linux VLAN device that corresponds to
- * 'vlan_tci' on the network device with port number 'realdev_odp_port' in
- * 'ofproto'.  For example, given 'realdev_odp_port' of eth0 and
'vlan_tci' 9,
- * it would return the port number of eth0.9.
+ * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
+ * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
+ * 'vlan_tci' 9, it would return the port number of eth0.9.
  *
- * Unless VLAN splinters are enabled for port 'realdev_odp_port', this
- * function just returns its 'realdev_odp_port' argument. */
-static uint32_t
+ * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
+ * function just returns its 'realdev_ofp_port' argument. */
+static uint16_t
 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
-                       uint32_t realdev_odp_port, ovs_be16 vlan_tci)
+                       uint16_t realdev_ofp_port, ovs_be16 vlan_tci)
 {
     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
-        uint16_t realdev_ofp_port = odp_port_to_ofp_port(realdev_odp_port);
         int vid = vlan_tci_to_vid(vlan_tci);
         const struct vlan_splinter *vsp;

@@ -6975,11 +6983,11 @@ vsp_realdev_to_vlandev(const struct ofproto_dpif
*ofproto,
                                  &ofproto->realdev_vid_map) {
             if (vsp->realdev_ofp_port == realdev_ofp_port
                 && vsp->vid == vid) {
-                return ofp_port_to_odp_port(vsp->vlandev_ofp_port);
+                return vsp->vlandev_ofp_port;
             }
         }
     }
-    return realdev_odp_port;
+    return realdev_ofp_port;
 }

 static struct vlan_splinter *
-- 
1.7.9.5


On Thu, Jun 6, 2013 at 11:27 PM, Kris zhang <zhang.kris at gmail.com> wrote:

> Thanks Alex, but how to take this bug fix? Do I have to upgrade to the
> latest version of openvswitch? Or just replace the file ofproto-dpif.c in
> version 1.7.1 and recompile?
>
> BTW today i found the lost packets issue again. My previous conclusion
> maybe not correct. Let me explain the whole process:
>
> centos6.0, ovs1.7.1, bonding3.5.0, kernel2.6.32-71.29.1.el6.x86_64,
> igb2.4.13
>
> 1) Adding a bond0, only has one slave eth0
> 1a) ifconfig bond0 up
> 2) ovs-vsctl add-br br0
> 2a) ovs-vsctl set bridge br0 stp_enable=true,
> other_config:stp-forward-delay=1
> 2b) ifconfig br0 <host_ip> up
> 3) ovs-vsctl add-port bond0
> 3a) ifconfig bond0 up
> 4) ovs-vsctl add-br br3000 br0 3000
> 5) tunctl -t taptest
> 5a) ifconfig taptest up
> 6) ovs-vsctl add-port br3000 taptest
> (Above is run by the system when the host booting)
>
> Ping this host ip, and it will lost 4 packets when the port taptest adding
> to br0.
> The strang thing is: If i create another tap "tap2", and run command
> manually: ovs-vsctl add-port br3000 tap2, it won't lost any packets.
> I compared the two tap, it's almost same. the clue is:
> In interface table:
> ----------------------------------------> taptest
> statistics  :  {stp_error_count=0, stp_rx_count=59, stp_tx_count=886}
> status      :  {stp_port_id="8001", stp_role=designated,
> stp_sec_in_state="1394", stp_state=forwarding}
>
> ----------------------------------------> tap2
> statistics  :  {stp_error_count=0, stp_rx_count=0, stp_tx_count=55}
> status      :  {stp_port_id="8003", stp_role=designated,
> stp_sec_in_state="66", stp_state=forwarding}
>
> Acutally i dont' know what meaning of above data. But my feeling tell me
> this is reason.
> So i stop the STP on br0, and it works. I hope above method can help other
> people to solve similar problem.
>
>
> Thanks,
> Kris
>
>
>
>
>
>
>
>
> On Wed, Jun 5, 2013 at 12:15 AM, Alex Wang <alexw at nicira.com> wrote:
>
>> Hey Kris,
>>
>> There was a bug in the "add_vsp()" function in "ofproto/ofproto-dpif.c".
>> And the fix is in this patch
>> http://git.openvswitch.org/cgi-bin/gitweb.cgi?p=openvswitch;a=commit;h=deea120099d23fac3f687ec302351e38a21ee353
>> .
>>
>> I think this may be the reason for your problem.
>>
>> Kind Regards,
>> Alex Wang
>>
>>
>> On Tue, Jun 4, 2013 at 8:07 AM, Kris zhang <zhang.kris at gmail.com> wrote:
>>
>>>  Thanks Jesse, I solved this issue through upgrade the NIC driver igb
>>> from 2.1 to 2.4, now it is ok even if use Linux bond as the external port
>>> of ovs bridge. And i will try to use OVS bonding once i have a chance. Also
>>> i hope FAQ can list which NIC driver has vlan problem, and should be
>>> upgrade to which version.
>>>
>>> Thanks,
>>> Kris
>>>
>>>
>>> On Thu, May 30, 2013 at 7:22 AM, Jesse Gross <jesse at nicira.com> wrote:
>>>
>>>> Linux bonds don't pass vlan information through to the driver. If you
>>>> use OVS bonding then you shouldn't have this problem.
>>>>
>>>> On Wed, May 29, 2013 at 11:08 AM, Kris zhang <zhang.kris at gmail.com>
>>>> wrote:
>>>> > I found the reason, the problem is not caused by vlan splinters, it
>>>> caused
>>>> > by bond0. I don't know why, but if remove the bond0, and ovs-br0
>>>> directly
>>>> > connect to eth0, the problem disappears.
>>>> >
>>>> >
>>>> >
>>>> >
>>>> > On Mon, May 27, 2013 at 10:10 AM, Kris zhang <zhang.kris at gmail.com>
>>>> wrote:
>>>> >>
>>>> >> Hi Jesse,
>>>> >>
>>>> >> Because if i don't use the vlan splinters, the VMs' network traffic
>>>> will
>>>> >> be nearly 0 M/s (but they can ping each other). I read FAQ, it says
>>>> the
>>>> >> reason maybe the NIC driver or Linux kernel version problems, and it
>>>> also
>>>> >> says the vlan splinters may solve this issue, but they don't said
>>>> the ping
>>>> >> will miss some packets when add a new vlan. is it a bug for vlan
>>>> splinters?
>>>> >> Please see the images in the attachment.
>>>> >>
>>>> >> Thanks,
>>>> >> Kris
>>>> >>
>>>> >>
>>>> >> On Thu, May 23, 2013 at 11:25 PM, Jesse Gross <jesse at nicira.com>
>>>> wrote:
>>>> >>>
>>>> >>> It seems that the original issue was that you were using VLANs when
>>>> >>> you shouldn't have. In that case, why are you trying to use VLAN
>>>> >>> splinters?
>>>> >>>
>>>> >>> On Thu, May 23, 2013 at 2:44 AM, Kris zhang <zhang.kris at gmail.com>
>>>> wrote:
>>>> >>> > Thanks Jesse, but i still cannot fix my issue. Because if i use
>>>> vlan
>>>> >>> > splinters, the above issue will be happened, if i don't use vlan
>>>> >>> > splinters,
>>>> >>> > the packets between the VMs are very slow (ping is ok), i use
>>>> ovs-dpctl
>>>> >>> > dump-flows br0, get the following result:
>>>> >>> >
>>>> >>> >
>>>> >>> >
>>>> in_port(1),eth(src=c6:b0:ea:37:29:47,dst=c6:b0:3a:c0:0d:55),eth_type(0x8100),vlan(vid=3000,pcp=0),encap(eth_type(0x0800),ipv4(src=20.1.120.12,dst=20.1.120.13,proto=1,tos=1,ttl=64,frag=no),icmp(type=8,code=0)),
>>>> >>> > packets:101, bytes:10302, used:4.341s, actions=pop_vlan,3
>>>> >>> >
>>>> >>> >
>>>> in_port(3),eth(src=c6:b0:3a:c0:0d:55,dst=c6:b0:ea:37:29:47),eth_type(0x0800),ipv4(src=20.1.120.13,dst=20.1.120.12,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)),
>>>> >>> > packets:101, bytes:9898, used:4.341s,
>>>> >>> > actions=push_vlan(vid=3000,pcp=0),1
>>>> >>> >
>>>> >>> > The port 1 is eth1, and the port 3 is the tap of VM.
>>>> >>> > Does the iptables affect the ovs? If not, i have to upgrade the
>>>> linux
>>>> >>> > kernel, or upgrade NIC driver.
>>>> >>> >
>>>> >>> > Thanks,
>>>> >>> > Kris
>>>> >>> >
>>>> >>> >
>>>> >>> >
>>>> >>> >
>>>> >>> > On Tue, May 21, 2013 at 3:32 AM, Jesse Gross <jesse at nicira.com>
>>>> wrote:
>>>> >>> >>
>>>> >>> >> There's an extensive section in the FAQ about vlans that I would
>>>> >>> >> recommend reading.
>>>> >>> >>
>>>> >>> >> On Mon, May 20, 2013 at 8:51 AM, Kris zhang <
>>>> zhang.kris at gmail.com>
>>>> >>> >> wrote:
>>>> >>> >> > No, so you mean it maybe not caused by vlan splinters?
>>>> >>> >> >
>>>> >>> >> >
>>>> >>> >> > On Mon, May 20, 2013 at 11:18 PM, Jesse Gross <
>>>> jesse at nicira.com>
>>>> >>> >> > wrote:
>>>> >>> >> >>
>>>> >>> >> >> On Mon, May 20, 2013 at 4:37 AM, Kris zhang <
>>>> zhang.kris at gmail.com>
>>>> >>> >> >> wrote:
>>>> >>> >> >> > Hi guys,
>>>> >>> >> >> >
>>>> >>> >> >> > I use ovs-1.7.1, and i run ovs on a single NIC host
>>>> (CentOS):
>>>> >>> >> >> >
>>>> >>> >> >> > # ovs-vsctl add-br br0
>>>> >>> >> >> > # ovs-vsctl add-port br0 eth0
>>>> >>> >> >> >
>>>> >>> >> >> > Then i setup the eth0 interface's other_config:
>>>> >>> >> >> > enable-vlan-splinters="true".
>>>> >>> >> >> >
>>>> >>> >> >> > Last I ping this host by another PC, and at same time i do
>>>> this:
>>>> >>> >> >> >
>>>> >>> >> >> > # ovs-vsctl add-port taptest br0 tag=100
>>>> >>> >> >> >
>>>> >>> >> >> > I found there are 4 "Request timed out." happened. That
>>>> means the
>>>> >>> >> >> > host
>>>> >>> >> >> > lost
>>>> >>> >> >> > connect about 4 seconds.
>>>> >>> >> >> >
>>>> >>> >> >> > If i remove the vlan splinters on interface eth0. it won't
>>>> >>> >> >> > happen.
>>>> >>> >> >> > So does anybody know the reason?
>>>> >>> >> >>
>>>> >>> >> >> Is the other machine actually on that VLAN?
>>>> >>> >> >
>>>> >>> >> >
>>>> >>> >
>>>> >>> >
>>>> >>
>>>> >>
>>>> >
>>>>
>>>
>>>
>>> _______________________________________________
>>> discuss mailing list
>>> discuss at openvswitch.org
>>> http://openvswitch.org/mailman/listinfo/discuss
>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://openvswitch.org/pipermail/ovs-discuss/attachments/20130607/ccbe4e20/attachment.html>


More information about the discuss mailing list