Failing client node due to not receiving metrics updates from client node within

classic Classic list List threaded Threaded
5 messages Options
radha radha
Reply | Threaded
Open this post in threaded view
|

Failing client node due to not receiving metrics updates from client node within

hi,
   Bought 4 ignite servers  on k8s, and ingested 160GB of data.
  I have a java application which is used to connect to servers as a client to get the metrics from the servers.
  Client will be up , but after some time it will get disconnect.
  
   In the server logs i get below error:
   
   {"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Added new node to topology: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2148, minorTopVer=0], crd=true, evt=NODE_JOINED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,214","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2148, minorTopVer=0], evt=NODE_JOINED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,981","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Pinging node: 6daf3474-2265-48dd-a8f0-471d1a437084"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,985","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Finished node ping [nodeId=6daf3474-2265-48dd-a8f0-471d1a437084, res=false, time=5ms]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,331","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Failing client node due to not receiving metrics updates from client node within 'IgniteConfiguration.clientFailureDetectionTimeout' (consider increasing configuration property) [timeout=30000, node=TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,332","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Node FAILED: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,333","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2151, minorTopVer=0], crd=true, evt=NODE_FAILED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,349","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2151, minorTopVer=0], evt=NODE_FAILED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
 



Client config file:

   <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

   <bean class="org.apache.ignite.configuration.IgniteConfiguration">
 
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
   <property name="peerClassLoadingEnabled" value="true"/>
    <!-- Enable cache events. -->
    <property name="includeEventTypes">
        <util:constant static-field="org.apache.ignite.events.EventType.EVTS_DISCOVERY"/>
    </property>


   <property name="failureDetectionTimeout" value="80000"/>
   <property name="clientFailureDetectionTimeout" value="80000"/>
   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>

      </bean>
       </property>
         <property name="networkTimeout" value="60000"/>
    </bean>
   </property>
   </bean>
   </beans>


Thanks
Radha

radha radha
Reply | Threaded
Open this post in threaded view
|

Re: Failing client node due to not receiving metrics updates from client node within

Hi,
Server configuration is below:
  <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

  
  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
   <property name="communicationSpi">
     <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
       <property name="messageQueueLimit" value="1024"/>
     </bean>
   </property>
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
 
   <property name="peerClassLoadingEnabled" value="{{ .Values.IgniteServer.peerClassLoading }}"/>
   <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
            <!-- Enable metrics for Ignite persistence  -->
            <property name="metricsEnabled" value="false"/>
            <property name="writeThrottlingEnabled" value="true"/>
            <property name="defaultDataRegionConfiguration">
                <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
                    <property name="name" value="Default_Region"/>
                    <!-- Enabling Apache Ignite Persistent Store. -->
                    <property name="persistenceEnabled" value="true"/>
                    <!-- Enable metrics for this data region  -->
                    <property name="metricsEnabled" value="false"/>
                    <property name="checkpointPageBufferSize" value="#{4096L * 1024 * 1024}"/>
                </bean>
            </property>
           
            <property name="storagePath" value="/opt/ignite/persist/"/>
            <property name="walPath" value="/opt/ignite/wal/"/>
            
        </bean>
    </property>
   

   <property name="gridLogger">
           <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
               <constructor-arg type="java.lang.String" value="/etc/ignite/log4j2/ignite-log4j2.xml"/>
           </bean>
   </property>
   <property name="failureDetectionTimeout" value="80000"/>


   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">

     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>
      </bean>
     </property>
    </bean>
   </property>
  </bean>

  </beans>

Regards
Radha 

On Wed, 6 Feb 2019 at 15:16, radha jai <[hidden email]> wrote:
hi,
   Bought 4 ignite servers  on k8s, and ingested 160GB of data.
  I have a java application which is used to connect to servers as a client to get the metrics from the servers.
  Client will be up , but after some time it will get disconnect.
  
   In the server logs i get below error:
   
   {"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Added new node to topology: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2148, minorTopVer=0], crd=true, evt=NODE_JOINED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,214","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2148, minorTopVer=0], evt=NODE_JOINED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,981","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Pinging node: 6daf3474-2265-48dd-a8f0-471d1a437084"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,985","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Finished node ping [nodeId=6daf3474-2265-48dd-a8f0-471d1a437084, res=false, time=5ms]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,331","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Failing client node due to not receiving metrics updates from client node within 'IgniteConfiguration.clientFailureDetectionTimeout' (consider increasing configuration property) [timeout=30000, node=TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,332","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Node FAILED: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,333","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2151, minorTopVer=0], crd=true, evt=NODE_FAILED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,349","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2151, minorTopVer=0], evt=NODE_FAILED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
 



Client config file:

   <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

   <bean class="org.apache.ignite.configuration.IgniteConfiguration">
 
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
   <property name="peerClassLoadingEnabled" value="true"/>
    <!-- Enable cache events. -->
    <property name="includeEventTypes">
        <util:constant static-field="org.apache.ignite.events.EventType.EVTS_DISCOVERY"/>
    </property>


   <property name="failureDetectionTimeout" value="80000"/>
   <property name="clientFailureDetectionTimeout" value="80000"/>
   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>

      </bean>
       </property>
         <property name="networkTimeout" value="60000"/>
    </bean>
   </property>
   </bean>
   </beans>


Thanks
Radha

ilya.kasnacheev ilya.kasnacheev
Reply | Threaded
Open this post in threaded view
|

Re: Failing client node due to not receiving metrics updates from client node within

Hello!

It is possible that there are problems with network connectivity.
Can you post more logs from both client and server?

Regards,
--
Ilya Kasnacheev


ср, 6 февр. 2019 г. в 13:17, radha jai <[hidden email]>:
Hi,
Server configuration is below:
  <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

  
  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
   <property name="communicationSpi">
     <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
       <property name="messageQueueLimit" value="1024"/>
     </bean>
   </property>
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
 
   <property name="peerClassLoadingEnabled" value="{{ .Values.IgniteServer.peerClassLoading }}"/>
   <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
            <!-- Enable metrics for Ignite persistence  -->
            <property name="metricsEnabled" value="false"/>
            <property name="writeThrottlingEnabled" value="true"/>
            <property name="defaultDataRegionConfiguration">
                <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
                    <property name="name" value="Default_Region"/>
                    <!-- Enabling Apache Ignite Persistent Store. -->
                    <property name="persistenceEnabled" value="true"/>
                    <!-- Enable metrics for this data region  -->
                    <property name="metricsEnabled" value="false"/>
                    <property name="checkpointPageBufferSize" value="#{4096L * 1024 * 1024}"/>
                </bean>
            </property>
           
            <property name="storagePath" value="/opt/ignite/persist/"/>
            <property name="walPath" value="/opt/ignite/wal/"/>
            
        </bean>
    </property>
   

   <property name="gridLogger">
           <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
               <constructor-arg type="java.lang.String" value="/etc/ignite/log4j2/ignite-log4j2.xml"/>
           </bean>
   </property>
   <property name="failureDetectionTimeout" value="80000"/>


   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">

     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>
      </bean>
     </property>
    </bean>
   </property>
  </bean>

  </beans>

Regards
Radha 

On Wed, 6 Feb 2019 at 15:16, radha jai <[hidden email]> wrote:
hi,
   Bought 4 ignite servers  on k8s, and ingested 160GB of data.
  I have a java application which is used to connect to servers as a client to get the metrics from the servers.
  Client will be up , but after some time it will get disconnect.
  
   In the server logs i get below error:
   
   {"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Added new node to topology: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2148, minorTopVer=0], crd=true, evt=NODE_JOINED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,214","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2148, minorTopVer=0], evt=NODE_JOINED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,981","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Pinging node: 6daf3474-2265-48dd-a8f0-471d1a437084"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,985","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Finished node ping [nodeId=6daf3474-2265-48dd-a8f0-471d1a437084, res=false, time=5ms]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,331","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Failing client node due to not receiving metrics updates from client node within 'IgniteConfiguration.clientFailureDetectionTimeout' (consider increasing configuration property) [timeout=30000, node=TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,332","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Node FAILED: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,333","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2151, minorTopVer=0], crd=true, evt=NODE_FAILED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,349","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2151, minorTopVer=0], evt=NODE_FAILED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
 



Client config file:

   <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

   <bean class="org.apache.ignite.configuration.IgniteConfiguration">
 
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
   <property name="peerClassLoadingEnabled" value="true"/>
    <!-- Enable cache events. -->
    <property name="includeEventTypes">
        <util:constant static-field="org.apache.ignite.events.EventType.EVTS_DISCOVERY"/>
    </property>


   <property name="failureDetectionTimeout" value="80000"/>
   <property name="clientFailureDetectionTimeout" value="80000"/>
   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>

      </bean>
       </property>
         <property name="networkTimeout" value="60000"/>
    </bean>
   </property>
   </bean>
   </beans>


Thanks
Radha

radha radha
Reply | Threaded
Open this post in threaded view
|

Re: Failing client node due to not receiving metrics updates from client node within

Cliene log:

[08:24:21]    __________  ________________
[08:24:21]   /  _/ ___/ |/ /  _/_  __/ __/
[08:24:21]  _/ // (7 7    // /  / / / _/
[08:24:21] /___/\___/_/|_/___/ /_/ /___/
[08:24:21]
[08:24:21] ver. 2.6.0#19700101-sha1:DEV
[08:24:21] 2018 Copyright(C) Apache Software Foundation
[08:24:21]
[08:24:21] Ignite documentation: http://ignite.apache.org
[08:24:21]
[08:24:21] Quiet mode.
[08:24:21]   ^-- Logging by 'JavaLogger [quiet=true, config=null]'
[08:24:21]   ^-- To see **FULL** console log here add -DIGNITE_QUIET=false or "-v" to ignite.{sh|bat}
[08:24:21]
[08:24:21] OS: Linux 4.19.8-1.el7.elrepo.x86_64 amd64
[08:24:21] VM information: OpenJDK Runtime Environment 1.8.0_181-b13 Oracle Corporation OpenJDK 64-Bit Server VM 25.181-b13
[08:24:21] Configured plugins:
[08:24:21]   ^-- None
[08:24:21]
[08:24:21] Configured failure handler: [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0]]
[08:24:21] Message queue limit is set to 0 which may lead to potential OOMEs when running cache operations in FULL_ASYNC or PRIMARY_SYNC modes due to message queues growth on sender and receiver sides.
[08:24:21] Security status [authentication=off, tls/ssl=off]
[08:24:23] REST protocols do not start on client node. To start the protocols on client node set '-DIGNITE_REST_START_ON_CLIENT=true' system property.
[08:24:25] IP finder returned empty addresses list. Please check IP finder configuration. Will retry every 2000 ms. Change 'reconnectDelay' to configure the frequency of retries.
[08:24:46] Performance suggestions for grid  (fix if possible)
[08:24:46] To disable, set -DIGNITE_PERFORMANCE_SUGGESTIONS_DISABLED=true
[08:24:46]   ^-- Disable grid events (remove 'includeEventTypes' from configuration)
[08:24:46]   ^-- Enable G1 Garbage Collector (add '-XX:+UseG1GC' to JVM options)
[08:24:46]   ^-- Specify JVM heap max size (add '-Xmx<size>[g|G|m|M|k|K]' to JVM options)
[08:24:46]   ^-- Set max direct memory size if getting 'OOME: Direct buffer memory' (add '-XX:MaxDirectMemorySize=<size>[g|G|m|M|k|K]' to JVM options)
[08:24:46]   ^-- Disable processing of calls to System.gc() (add '-XX:+DisableExplicitGC' to JVM options)
[08:24:46]   ^-- Speed up flushing of dirty pages by OS (alter vm.dirty_expire_centisecs parameter by setting to 500)
[08:24:46]   ^-- Reduce pages swapping ratio (set vm.swappiness=10)
[08:24:46] Refer to this page for more performance suggestions: https://apacheignite.readme.io/docs/jvm-and-system-tuning
[08:24:46]
[08:24:46] To start Console Management & Monitoring run ignitevisorcmd.{sh|bat}
[08:24:46]
[08:24:46] Ignite node started OK (id=59c2f355)
[08:24:46] >>> Ignite cluster is not active (limited functionality available). Use control.(sh|bat) script or IgniteCluster interface to activate.
[08:24:46] Topology snapshot [ver=2, servers=1, clients=1, CPUs=64, offheap=25.0GB, heap=28.0GB]
[08:24:46]   ^-- Node [id=59C2F355-68F8-4185-A9EB-6B10C1F09CBD, clusterState=INACTIVE]
[08:24:56] New version is available at ignite.apache.org: 2.7.0
[08:25:15] Topology snapshot [ver=3, servers=2, clients=1, CPUs=96, offheap=50.0GB, heap=29.0GB]
[08:25:15]   ^-- Node [id=59C2F355-68F8-4185-A9EB-6B10C1F09CBD, clusterState=INACTIVE]
/bin/sh: line 1:    11 Killed                  java -cp /opt/ignite/*:/opt/ignite/dependency-jars/* com.ignite.test.IgniteMetrics $CONFIG_URI


I have enabled debug logs so attaching the server log as it is very big.
I observed the below WARNING in that server log file:
{"type":"log","host":"ignite-cluster-test-ignite-0","level":"WARN","systemid":"c186d1d8","system":"ignite-service","time":"2019-02-08 08:27:52,402","logger":"TcpCommunicationSpi","timezone":"UTC","marker":"","log":"Failed to connect to a remote node (make sure that destination node is alive and operating system firewall is disabled on local and remote hosts)

Also what i noticed, i have two k8s cluster. On one cluster i am not facing this issue, but on another i have this issue.

Regards
Radha

On Thu, 7 Feb 2019 at 20:30, Ilya Kasnacheev <[hidden email]> wrote:
Hello!

It is possible that there are problems with network connectivity.
Can you post more logs from both client and server?

Regards,
--
Ilya Kasnacheev


ср, 6 февр. 2019 г. в 13:17, radha jai <[hidden email]>:
Hi,
Server configuration is below:
  <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

  
  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
   <property name="communicationSpi">
     <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
       <property name="messageQueueLimit" value="1024"/>
     </bean>
   </property>
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
 
   <property name="peerClassLoadingEnabled" value="{{ .Values.IgniteServer.peerClassLoading }}"/>
   <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
            <!-- Enable metrics for Ignite persistence  -->
            <property name="metricsEnabled" value="false"/>
            <property name="writeThrottlingEnabled" value="true"/>
            <property name="defaultDataRegionConfiguration">
                <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
                    <property name="name" value="Default_Region"/>
                    <!-- Enabling Apache Ignite Persistent Store. -->
                    <property name="persistenceEnabled" value="true"/>
                    <!-- Enable metrics for this data region  -->
                    <property name="metricsEnabled" value="false"/>
                    <property name="checkpointPageBufferSize" value="#{4096L * 1024 * 1024}"/>
                </bean>
            </property>
           
            <property name="storagePath" value="/opt/ignite/persist/"/>
            <property name="walPath" value="/opt/ignite/wal/"/>
            
        </bean>
    </property>
   

   <property name="gridLogger">
           <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
               <constructor-arg type="java.lang.String" value="/etc/ignite/log4j2/ignite-log4j2.xml"/>
           </bean>
   </property>
   <property name="failureDetectionTimeout" value="80000"/>


   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">

     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>
      </bean>
     </property>
    </bean>
   </property>
  </bean>

  </beans>

Regards
Radha 

On Wed, 6 Feb 2019 at 15:16, radha jai <[hidden email]> wrote:
hi,
   Bought 4 ignite servers  on k8s, and ingested 160GB of data.
  I have a java application which is used to connect to servers as a client to get the metrics from the servers.
  Client will be up , but after some time it will get disconnect.
  
   In the server logs i get below error:
   
   {"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Added new node to topology: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2148, minorTopVer=0], crd=true, evt=NODE_JOINED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,214","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2148, minorTopVer=0], evt=NODE_JOINED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,981","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Pinging node: 6daf3474-2265-48dd-a8f0-471d1a437084"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,985","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Finished node ping [nodeId=6daf3474-2265-48dd-a8f0-471d1a437084, res=false, time=5ms]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,331","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Failing client node due to not receiving metrics updates from client node within 'IgniteConfiguration.clientFailureDetectionTimeout' (consider increasing configuration property) [timeout=30000, node=TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,332","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Node FAILED: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,333","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2151, minorTopVer=0], crd=true, evt=NODE_FAILED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,349","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2151, minorTopVer=0], evt=NODE_FAILED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
 



Client config file:

   <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

   <bean class="org.apache.ignite.configuration.IgniteConfiguration">
 
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
   <property name="peerClassLoadingEnabled" value="true"/>
    <!-- Enable cache events. -->
    <property name="includeEventTypes">
        <util:constant static-field="org.apache.ignite.events.EventType.EVTS_DISCOVERY"/>
    </property>


   <property name="failureDetectionTimeout" value="80000"/>
   <property name="clientFailureDetectionTimeout" value="80000"/>
   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>

      </bean>
       </property>
         <property name="networkTimeout" value="60000"/>
    </bean>
   </property>
   </bean>
   </beans>


Thanks
Radha


log.txt (1M) Download Attachment
ilya.kasnacheev ilya.kasnacheev
Reply | Threaded
Open this post in threaded view
|

Re: Failing client node due to not receiving metrics updates from client node within

Hello!

/bin/sh: line 1:    11 Killed                  java -cp /opt/ignite/*:/opt/ignite/dependency-jars/* com.ignite.test.IgniteMetrics $CONFIG_URI

Why was the client killed? Server log errors might be related to that fact.

Regards,
--
Ilya Kasnacheev


пт, 8 февр. 2019 г. в 12:38, radha jai <[hidden email]>:
Cliene log:

[08:24:21]    __________  ________________
[08:24:21]   /  _/ ___/ |/ /  _/_  __/ __/
[08:24:21]  _/ // (7 7    // /  / / / _/
[08:24:21] /___/\___/_/|_/___/ /_/ /___/
[08:24:21]
[08:24:21] ver. 2.6.0#19700101-sha1:DEV
[08:24:21] 2018 Copyright(C) Apache Software Foundation
[08:24:21]
[08:24:21] Ignite documentation: http://ignite.apache.org
[08:24:21]
[08:24:21] Quiet mode.
[08:24:21]   ^-- Logging by 'JavaLogger [quiet=true, config=null]'
[08:24:21]   ^-- To see **FULL** console log here add -DIGNITE_QUIET=false or "-v" to ignite.{sh|bat}
[08:24:21]
[08:24:21] OS: Linux 4.19.8-1.el7.elrepo.x86_64 amd64
[08:24:21] VM information: OpenJDK Runtime Environment 1.8.0_181-b13 Oracle Corporation OpenJDK 64-Bit Server VM 25.181-b13
[08:24:21] Configured plugins:
[08:24:21]   ^-- None
[08:24:21]
[08:24:21] Configured failure handler: [hnd=StopNodeOrHaltFailureHandler [tryStop=false, timeout=0]]
[08:24:21] Message queue limit is set to 0 which may lead to potential OOMEs when running cache operations in FULL_ASYNC or PRIMARY_SYNC modes due to message queues growth on sender and receiver sides.
[08:24:21] Security status [authentication=off, tls/ssl=off]
[08:24:23] REST protocols do not start on client node. To start the protocols on client node set '-DIGNITE_REST_START_ON_CLIENT=true' system property.
[08:24:25] IP finder returned empty addresses list. Please check IP finder configuration. Will retry every 2000 ms. Change 'reconnectDelay' to configure the frequency of retries.
[08:24:46] Performance suggestions for grid  (fix if possible)
[08:24:46] To disable, set -DIGNITE_PERFORMANCE_SUGGESTIONS_DISABLED=true
[08:24:46]   ^-- Disable grid events (remove 'includeEventTypes' from configuration)
[08:24:46]   ^-- Enable G1 Garbage Collector (add '-XX:+UseG1GC' to JVM options)
[08:24:46]   ^-- Specify JVM heap max size (add '-Xmx<size>[g|G|m|M|k|K]' to JVM options)
[08:24:46]   ^-- Set max direct memory size if getting 'OOME: Direct buffer memory' (add '-XX:MaxDirectMemorySize=<size>[g|G|m|M|k|K]' to JVM options)
[08:24:46]   ^-- Disable processing of calls to System.gc() (add '-XX:+DisableExplicitGC' to JVM options)
[08:24:46]   ^-- Speed up flushing of dirty pages by OS (alter vm.dirty_expire_centisecs parameter by setting to 500)
[08:24:46]   ^-- Reduce pages swapping ratio (set vm.swappiness=10)
[08:24:46] Refer to this page for more performance suggestions: https://apacheignite.readme.io/docs/jvm-and-system-tuning
[08:24:46]
[08:24:46] To start Console Management & Monitoring run ignitevisorcmd.{sh|bat}
[08:24:46]
[08:24:46] Ignite node started OK (id=59c2f355)
[08:24:46] >>> Ignite cluster is not active (limited functionality available). Use control.(sh|bat) script or IgniteCluster interface to activate.
[08:24:46] Topology snapshot [ver=2, servers=1, clients=1, CPUs=64, offheap=25.0GB, heap=28.0GB]
[08:24:46]   ^-- Node [id=59C2F355-68F8-4185-A9EB-6B10C1F09CBD, clusterState=INACTIVE]
[08:24:56] New version is available at ignite.apache.org: 2.7.0
[08:25:15] Topology snapshot [ver=3, servers=2, clients=1, CPUs=96, offheap=50.0GB, heap=29.0GB]
[08:25:15]   ^-- Node [id=59C2F355-68F8-4185-A9EB-6B10C1F09CBD, clusterState=INACTIVE]
/bin/sh: line 1:    11 Killed                  java -cp /opt/ignite/*:/opt/ignite/dependency-jars/* com.ignite.test.IgniteMetrics $CONFIG_URI


I have enabled debug logs so attaching the server log as it is very big.
I observed the below WARNING in that server log file:
{"type":"log","host":"ignite-cluster-test-ignite-0","level":"WARN","systemid":"c186d1d8","system":"ignite-service","time":"2019-02-08 08:27:52,402","logger":"TcpCommunicationSpi","timezone":"UTC","marker":"","log":"Failed to connect to a remote node (make sure that destination node is alive and operating system firewall is disabled on local and remote hosts)

Also what i noticed, i have two k8s cluster. On one cluster i am not facing this issue, but on another i have this issue.

Regards
Radha

On Thu, 7 Feb 2019 at 20:30, Ilya Kasnacheev <[hidden email]> wrote:
Hello!

It is possible that there are problems with network connectivity.
Can you post more logs from both client and server?

Regards,
--
Ilya Kasnacheev


ср, 6 февр. 2019 г. в 13:17, radha jai <[hidden email]>:
Hi,
Server configuration is below:
  <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

  
  <bean class="org.apache.ignite.configuration.IgniteConfiguration">
   <property name="communicationSpi">
     <bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
       <property name="messageQueueLimit" value="1024"/>
     </bean>
   </property>
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
 
   <property name="peerClassLoadingEnabled" value="{{ .Values.IgniteServer.peerClassLoading }}"/>
   <property name="dataStorageConfiguration">
        <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
            <!-- Enable metrics for Ignite persistence  -->
            <property name="metricsEnabled" value="false"/>
            <property name="writeThrottlingEnabled" value="true"/>
            <property name="defaultDataRegionConfiguration">
                <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
                    <property name="name" value="Default_Region"/>
                    <!-- Enabling Apache Ignite Persistent Store. -->
                    <property name="persistenceEnabled" value="true"/>
                    <!-- Enable metrics for this data region  -->
                    <property name="metricsEnabled" value="false"/>
                    <property name="checkpointPageBufferSize" value="#{4096L * 1024 * 1024}"/>
                </bean>
            </property>
           
            <property name="storagePath" value="/opt/ignite/persist/"/>
            <property name="walPath" value="/opt/ignite/wal/"/>
            
        </bean>
    </property>
   

   <property name="gridLogger">
           <bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
               <constructor-arg type="java.lang.String" value="/etc/ignite/log4j2/ignite-log4j2.xml"/>
           </bean>
   </property>
   <property name="failureDetectionTimeout" value="80000"/>


   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">

     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>
      </bean>
     </property>
    </bean>
   </property>
  </bean>

  </beans>

Regards
Radha 

On Wed, 6 Feb 2019 at 15:16, radha jai <[hidden email]> wrote:
hi,
   Bought 4 ignite servers  on k8s, and ingested 160GB of data.
  I have a java application which is used to connect to servers as a client to get the metrics from the servers.
  Client will be up , but after some time it will get disconnect.
  
   In the server logs i get below error:
   
   {"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Added new node to topology: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,201","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2148, minorTopVer=0], crd=true, evt=NODE_JOINED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:34,214","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2148, minorTopVer=0], evt=NODE_JOINED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,981","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Pinging node: 6daf3474-2265-48dd-a8f0-471d1a437084"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:24:41,985","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Finished node ping [nodeId=6daf3474-2265-48dd-a8f0-471d1a437084, res=false, time=5ms]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,331","logger":"TcpDiscoverySpi","timezone":"UTC","marker":"","log":"Failing client node due to not receiving metrics updates from client node within 'IgniteConfiguration.clientFailureDetectionTimeout' (consider increasing configuration property) [timeout=30000, node=TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]]"}
{"type":"log","host":"ignite-cluster-0","level":"WARN","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,332","logger":"GridDiscoveryManager","timezone":"UTC","marker":"","log":"Node FAILED: TcpDiscoveryNode [id=6daf3474-2265-48dd-a8f0-471d1a437084, addrs=[0:0:0:0:0:0:0:1%lo, 127.0.0.1, 192.168.1.165], sockAddrs=[/0:0:0:0:0:0:0:1%lo:0, /127.0.0.1:0, /192.168.1.165:0], discPort=0, order=2148, intOrder=1077, lastExchangeTime=1549445074167, loc=false, ver=2.6.0#20180710-sha1:669feacc, isClient=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,333","logger":"time","timezone":"UTC","marker":"","log":"Started exchange init [topVer=AffinityTopologyVersion [topVer=2151, minorTopVer=0], crd=true, evt=NODE_FAILED, evtNode=6daf3474-2265-48dd-a8f0-471d1a437084, customEvt=null, allowMerge=true]"}
{"type":"log","host":"ignite-cluster-0","level":"INFO","systemid":"5605ca0e","system":"ignite-service","time":"2019-02-06 09:25:11,349","logger":"GridCachePartitionExchangeManager","timezone":"UTC","marker":"","log":"Skipping rebalancing (nothing scheduled) [top=AffinityTopologyVersion [topVer=2151, minorTopVer=0], evt=NODE_FAILED, node=6daf3474-2265-48dd-a8f0-471d1a437084]"}
 



Client config file:

   <?xml version="1.0" encoding="UTF-8"?>
       xsi:schemaLocation="

   <bean class="org.apache.ignite.configuration.IgniteConfiguration">
 
   <property name="connectorConfiguration">
         <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
             <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
         </bean>
   </property>
   <property name="peerClassLoadingEnabled" value="true"/>
    <!-- Enable cache events. -->
    <property name="includeEventTypes">
        <util:constant static-field="org.apache.ignite.events.EventType.EVTS_DISCOVERY"/>
    </property>


   <property name="failureDetectionTimeout" value="80000"/>
   <property name="clientFailureDetectionTimeout" value="80000"/>
   <property name="discoverySpi">
    <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
     <property name="ipFinder">
      <bean
          class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
          <property name="serviceName" value="ignite-service"/>
          <property name="namespace" value="ign"/>

      </bean>
       </property>
         <property name="networkTimeout" value="60000"/>
    </bean>
   </property>
   </bean>
   </beans>


Thanks
Radha