Folks,

I am getting some strange errors on my kolla based OVN deployment. I have only 5 nodes so it's not a large deployment. Are there any ovn related tuning options which I missed ?

I have the following timers configured at present. 

ovn-openflow-probe-interval="60"
ovn-remote-probe-interval="60000"

root@ctrl1:~# tail -f /var/log/kolla/openvswitch/ov*.log
==> /var/log/kolla/openvswitch/ovn-controller.log <==
2023-06-09T06:45:47.615Z|00091|lflow_cache|INFO|Detected cache inactivity (last active 30001 ms ago): trimming cache
2023-06-09T07:11:12.908Z|00092|lflow_cache|INFO|Detected cache inactivity (last active 30002 ms ago): trimming cache
2023-06-09T07:12:21.225Z|00093|lflow_cache|INFO|Detected cache inactivity (last active 30001 ms ago): trimming cache
2023-06-10T19:13:10.382Z|00094|lflow_cache|INFO|Detected cache inactivity (last active 30001 ms ago): trimming cache
2023-06-10T19:17:10.734Z|00095|lflow_cache|INFO|Detected cache inactivity (last active 30002 ms ago): trimming cache
2023-06-10T19:18:33.270Z|00096|lflow_cache|INFO|Detected cache inactivity (last active 30004 ms ago): trimming cache
2023-06-10T19:25:23.987Z|00097|lflow_cache|INFO|Detected cache inactivity (last active 30002 ms ago): trimming cache
2023-06-10T19:32:03.981Z|00098|lflow_cache|INFO|Detected cache inactivity (last active 30003 ms ago): trimming cache
2023-06-10T19:36:59.153Z|00099|lflow_cache|INFO|Detected cache inactivity (last active 30006 ms ago): trimming cache
2023-06-10T20:18:34.798Z|00100|lflow_cache|INFO|Detected cache inactivity (last active 30002 ms ago): trimming cache

==> /var/log/kolla/openvswitch/ovn-nb-db.log <==
2023-06-10T20:52:34.461Z|00518|reconnect|WARN|tcp:192.168.1.11:56798: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.463Z|00519|reconnect|WARN|tcp:192.168.1.13:57048: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.464Z|00520|reconnect|WARN|tcp:192.168.1.11:56792: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.465Z|00521|reconnect|WARN|tcp:192.168.1.13:57016: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.466Z|00522|reconnect|WARN|tcp:192.168.1.11:56746: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.466Z|00523|reconnect|WARN|tcp:192.168.1.11:56742: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.468Z|00524|reconnect|WARN|tcp:192.168.1.11:56786: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.472Z|00525|reconnect|WARN|tcp:192.168.1.11:56784: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.474Z|00526|reconnect|WARN|tcp:192.168.1.13:57044: connection dropped (Connection reset by peer)
2023-06-10T20:52:34.484Z|00527|reconnect|WARN|tcp:192.168.1.11:56760: connection dropped (Connection reset by peer)

==> /var/log/kolla/openvswitch/ovn-northd.log <==
2023-06-10T20:52:34.377Z|00695|reconnect|INFO|tcp:192.168.1.12:6641: connected
2023-06-10T20:52:34.379Z|00696|ovsdb_cs|INFO|tcp:192.168.1.12:6641: clustered database server is not cluster leader; trying another server
2023-06-10T20:52:34.379Z|00697|ovsdb_cs|INFO|tcp:192.168.1.12:6641: clustered database server is not cluster leader; trying another server
2023-06-10T20:52:34.379Z|00698|reconnect|INFO|tcp:192.168.1.12:6641: connection attempt timed out
2023-06-10T20:52:34.380Z|00699|reconnect|INFO|tcp:192.168.1.11:6641: connecting...
2023-06-10T20:52:34.380Z|00700|reconnect|INFO|tcp:192.168.1.11:6641: connected
2023-06-10T20:52:34.408Z|00701|ovsdb_cs|INFO|tcp:192.168.1.11:6641: clustered database server is not cluster leader; trying another server
2023-06-10T20:52:34.408Z|00702|reconnect|INFO|tcp:192.168.1.11:6641: connection attempt timed out
2023-06-10T20:52:35.409Z|00703|reconnect|INFO|tcp:192.168.1.13:6641: connecting...
2023-06-10T20:52:35.409Z|00704|reconnect|INFO|tcp:192.168.1.13:6641: connected

==> /var/log/kolla/openvswitch/ovn-sb-db.log <==
2023-06-10T20:51:27.588Z|00496|raft|INFO|Transferring leadership to write a snapshot.
2023-06-10T20:51:27.597Z|00497|raft|INFO|rejected append_reply (not leader)
2023-06-10T20:51:27.597Z|00498|raft|INFO|rejected append_reply (not leader)
2023-06-10T20:51:27.597Z|00499|raft|INFO|server d051 is leader for term 3574
2023-06-10T20:51:27.663Z|00500|jsonrpc|WARN|tcp:192.168.1.13:45144: receive error: Connection reset by peer
2023-06-10T20:51:27.664Z|00501|reconnect|WARN|tcp:192.168.1.13:45144: connection dropped (Connection reset by peer)
2023-06-10T20:51:27.665Z|00502|jsonrpc|WARN|tcp:192.168.1.12:47858: receive error: Connection reset by peer
2023-06-10T20:51:27.665Z|00503|reconnect|WARN|tcp:192.168.1.12:47858: connection dropped (Connection reset by peer)
2023-06-10T20:51:27.667Z|00504|jsonrpc|WARN|tcp:192.168.1.11:41500: receive error: Connection reset by peer
2023-06-10T20:51:27.667Z|00505|reconnect|WARN|tcp:192.168.1.11:41500: connection dropped (Connection reset by peer)

==> /var/log/kolla/openvswitch/ovsdb-server.log <==
2023-05-29T02:05:02.891Z|00027|reconnect|WARN|unix#67466: connection dropped (Connection reset by peer)
2023-05-31T16:20:33.494Z|00028|reconnect|ERR|tcp:127.0.0.1:59928: no response to inactivity probe after 5 seconds, disconnecting
2023-06-01T20:43:23.516Z|00001|vlog|INFO|opened log file /var/log/kolla/openvswitch/ovsdb-server.log
2023-06-01T20:43:23.520Z|00002|ovsdb_server|INFO|ovsdb-server (Open vSwitch) 2.17.3
2023-06-01T20:43:33.522Z|00003|memory|INFO|7216 kB peak resident set size after 10.0 seconds
2023-06-01T20:43:33.522Z|00004|memory|INFO|atoms:826 cells:770 monitors:5 sessions:3
2023-06-03T07:44:05.774Z|00005|reconnect|ERR|tcp:127.0.0.1:40098: no response to inactivity probe after 5 seconds, disconnecting
2023-06-03T09:41:49.039Z|00006|reconnect|ERR|tcp:127.0.0.1:60042: no response to inactivity probe after 5 seconds, disconnecting
2023-06-11T02:05:08.802Z|00007|jsonrpc|WARN|unix#26478: receive error: Connection reset by peer
2023-06-11T02:05:08.803Z|00008|reconnect|WARN|unix#26478: connection dropped (Connection reset by peer)

==> /var/log/kolla/openvswitch/ovs-vswitchd.log <==
2023-06-10T19:16:50.733Z|00138|connmgr|INFO|br-int<->unix#3: 4 flow_mods in the 2 s starting 10 s ago (4 adds)
2023-06-10T19:18:13.267Z|00139|connmgr|INFO|br-int<->unix#3: 14 flow_mods 10 s ago (14 adds)
2023-06-10T19:19:13.267Z|00140|connmgr|INFO|br-int<->unix#3: 4 flow_mods 39 s ago (4 adds)
2023-06-10T19:22:23.652Z|00141|connmgr|INFO|br-int<->unix#3: 2 flow_mods 10 s ago (2 adds)
2023-06-10T19:24:11.917Z|00142|connmgr|INFO|br-int<->unix#3: 16 flow_mods 10 s ago (16 deletes)
2023-06-10T19:25:11.917Z|00143|connmgr|INFO|br-int<->unix#3: 42 flow_mods in the 26 s starting 44 s ago (28 adds, 14 deletes)
2023-06-10T19:31:41.912Z|00144|connmgr|INFO|br-int<->unix#3: 113 flow_mods in the 2 s starting 10 s ago (6 adds, 107 deletes)
2023-06-10T19:36:29.369Z|00145|connmgr|INFO|br-int<->unix#3: 109 flow_mods in the 9 s starting 10 s ago (103 adds, 6 deletes)
2023-06-10T19:39:49.885Z|00146|connmgr|INFO|br-int<->unix#3: 2 flow_mods 10 s ago (2 adds)
2023-06-10T20:18:12.593Z|00147|connmgr|INFO|br-int<->unix#3: 111 flow_mods in the 2 s starting 10 s ago (6 adds, 105 deletes)