One of Ignite pod keeps crashing and not joining the cluster

classic Classic list List threaded Threaded
3 messages Options
radha radha
Reply | Threaded
Open this post in threaded view
|

One of Ignite pod keeps crashing and not joining the cluster

Hi ,
 Ignite being deployed on the kubernetes, there were 3 replicas of ignite server, The sever was up and running for some days, and data being injected successfully, after that suddenly  I am getting below error on one of the server pod, which is getting restating mutiple times:
   Failed to process custom exchange task: ClientCacheChangeDummyDiscoveryMessage   [reqId=6b5f6c50-a8c9-4b04-a461-49bfd0112eb0, cachesToClose=null, startCaches=[BgwService]] java.lang.NullPointerException| at org.apache.ignite.internal.processors.cache.CacheAffinitySharedManager.processClientCachesChanges(CacheAffinitySharedManager.java:635)| at org.apache.ignite.internal.processors.cache.GridCacheProcessor.processCustomExchangeTask(GridCacheProcessor.java:391)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.processCustomTask(GridCachePartitionExchangeManager.java:2475)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body0(GridCachePartitionExchangeManager.java:2620)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2539)| at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120)| at java.lang.Thread.run(Thread.java:748)"

Below is my ignite-xml file:
ignite-config.xml:
----
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xmlns:util="http://www.springframework.org/schema/util"
     xsi:schemaLocation="
      http://www.springframework.org/schema/beans
      http://www.springframework.org/schema/beans/spring-beans.xsd
      http://www.springframework.org/schema/util
      http://www.springframework.org/schema/util/spring-util.xsd">
<bean class="org.apache.ignite.configuration.IgniteConfiguration">
<property name="WorkDirectory" value="/opt/ignite/persistence//work"/>
 <property name="connectorConfiguration">
       <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
           <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
       </bean>
 </property>
 <property name="peerClassLoadingEnabled" value="true"/>
 <property name="dataStorageConfiguration">
      <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
          <!-- Enable metrics for Ignite persistence  -->
          <property name="metricsEnabled" value="true"/>
          <property name="defaultDataRegionConfiguration">
              <bean class="org.apache.ignite.configuration.DataRegionConfiguration">

                  <property name="name" value="Default_Region"/>
                  <property name="pageEvictionMode" value="RANDOM_2_LRU"/>
                  <property name="initialSize" value="#{20 * 1024 * 1024}"/>
                  <property name="maxSize" value="#{60L * 1024 * 1024 * 1024}"/>
                  <!-- Enabling Apache Ignite Persistent Store. -->
                  <property name="persistenceEnabled" value="true"/>
                  <!-- Enable metrics for this data region  -->
                  <property name="metricsEnabled" value="true"/>
              </bean>
          </property>
          <property name="storagePath" value="/opt/ignite/persistence/"/>
          <property name="walPath" value="/opt/ignite/wal/"/>
          <property name="walArchivePath" value="/opt/ignite/wal/archive"/>
          <property name="walMode" value="FSYNC"/>
      </bean>
  </property>
 <property name="authenticationEnabled" value="false"/>
 <property name="discoverySpi">
  <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
   <property name="ipFinder">
    <bean
        class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
        <property name="serviceName" value="ignite-svc"/>
        <property name="namespace" value="default"/>
    </bean>
   </property>
  </bean>
 </property>
    <property name="sqlSchemas">
        <list>
            <value>Trans</value>
            <value>Info</value>
            <value>Msg</value>
        </list>
    </property>
    <property name="cacheConfiguration">
             <list>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc1*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc1"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="MINUTES"/>
                           <constructor-arg value="75"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc6*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc6"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc16*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc16"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateTrans*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupTrans"/>
                 </bean>
             </list>
    </property>

</bean>
</beans>

Regards
radha
Stanislav Lukyanov Stanislav Lukyanov
Reply | Threaded
Open this post in threaded view
|

Re: One of Ignite pod keeps crashing and not joining the cluster

Hi,

Please share
- Ignite version you're running
- Exact steps and events (a node was restarted, a client joined, etc)
- Logs of all three servers

Thanks,
Stan

On Mon, Aug 19, 2019 at 3:27 PM radha jai <[hidden email]> wrote:
Hi ,
 Ignite being deployed on the kubernetes, there were 3 replicas of ignite server, The sever was up and running for some days, and data being injected successfully, after that suddenly  I am getting below error on one of the server pod, which is getting restating mutiple times:
   Failed to process custom exchange task: ClientCacheChangeDummyDiscoveryMessage   [reqId=6b5f6c50-a8c9-4b04-a461-49bfd0112eb0, cachesToClose=null, startCaches=[BgwService]] java.lang.NullPointerException| at org.apache.ignite.internal.processors.cache.CacheAffinitySharedManager.processClientCachesChanges(CacheAffinitySharedManager.java:635)| at org.apache.ignite.internal.processors.cache.GridCacheProcessor.processCustomExchangeTask(GridCacheProcessor.java:391)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.processCustomTask(GridCachePartitionExchangeManager.java:2475)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body0(GridCachePartitionExchangeManager.java:2620)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2539)| at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120)| at java.lang.Thread.run(Thread.java:748)"

Below is my ignite-xml file:
ignite-config.xml:
----
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xmlns:util="http://www.springframework.org/schema/util"
     xsi:schemaLocation="
      http://www.springframework.org/schema/beans
      http://www.springframework.org/schema/beans/spring-beans.xsd
      http://www.springframework.org/schema/util
      http://www.springframework.org/schema/util/spring-util.xsd">
<bean class="org.apache.ignite.configuration.IgniteConfiguration">
<property name="WorkDirectory" value="/opt/ignite/persistence//work"/>
 <property name="connectorConfiguration">
       <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
           <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
       </bean>
 </property>
 <property name="peerClassLoadingEnabled" value="true"/>
 <property name="dataStorageConfiguration">
      <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
          <!-- Enable metrics for Ignite persistence  -->
          <property name="metricsEnabled" value="true"/>
          <property name="defaultDataRegionConfiguration">
              <bean class="org.apache.ignite.configuration.DataRegionConfiguration">

                  <property name="name" value="Default_Region"/>
                  <property name="pageEvictionMode" value="RANDOM_2_LRU"/>
                  <property name="initialSize" value="#{20 * 1024 * 1024}"/>
                  <property name="maxSize" value="#{60L * 1024 * 1024 * 1024}"/>
                  <!-- Enabling Apache Ignite Persistent Store. -->
                  <property name="persistenceEnabled" value="true"/>
                  <!-- Enable metrics for this data region  -->
                  <property name="metricsEnabled" value="true"/>
              </bean>
          </property>
          <property name="storagePath" value="/opt/ignite/persistence/"/>
          <property name="walPath" value="/opt/ignite/wal/"/>
          <property name="walArchivePath" value="/opt/ignite/wal/archive"/>
          <property name="walMode" value="FSYNC"/>
      </bean>
  </property>
 <property name="authenticationEnabled" value="false"/>
 <property name="discoverySpi">
  <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
   <property name="ipFinder">
    <bean
        class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
        <property name="serviceName" value="ignite-svc"/>
        <property name="namespace" value="default"/>
    </bean>
   </property>
  </bean>
 </property>
    <property name="sqlSchemas">
        <list>
            <value>Trans</value>
            <value>Info</value>
            <value>Msg</value>
        </list>
    </property>
    <property name="cacheConfiguration">
             <list>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc1*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc1"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="MINUTES"/>
                           <constructor-arg value="75"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc6*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc6"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc16*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc16"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateTrans*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupTrans"/>
                 </bean>
             </list>
    </property>

</bean>
</beans>

Regards
radha
radha radha
Reply | Threaded
Open this post in threaded view
|

Re: One of Ignite pod keeps crashing and not joining the cluster

Hi, 
  Ignite version used is 2.7.0 
  I dont have the server logs for now, but i will share the JVM error log file. 
  Ignite has been deployed on the kubernets , there are 3 replicas of server pod. The pods were up and running fine for 9 days.  We have created 180 inventory tables and 204 transactional tables. The data has been inserted using the PyIgnite client using the cache.put() method.  This is a very slow operation because PyIgnite is very slow.  Each insert is committed one at a time, so it is not able to do bulk-style inserts. The PyIgnite was inserting about 20 of the inventory tables simultaneously (20 different threads/processes). 

Heap Memory and RAM Configurations are as below on each of the ignite server container: 

Heap Memory: 32gb 
RAM: 64GB 
Default memory region: 60GB 
cpu: 4 

Persistence volume 
wal_storage_size: 10GB 
persistence_storage_size: 10GB 

Is the with the above configuration is proper?
Thanks
Krupa

On Thu, 22 Aug 2019 at 18:15, Stanislav Lukyanov <[hidden email]> wrote:
Hi,

Please share
- Ignite version you're running
- Exact steps and events (a node was restarted, a client joined, etc)
- Logs of all three servers

Thanks,
Stan

On Mon, Aug 19, 2019 at 3:27 PM radha jai <[hidden email]> wrote:
Hi ,
 Ignite being deployed on the kubernetes, there were 3 replicas of ignite server, The sever was up and running for some days, and data being injected successfully, after that suddenly  I am getting below error on one of the server pod, which is getting restating mutiple times:
   Failed to process custom exchange task: ClientCacheChangeDummyDiscoveryMessage   [reqId=6b5f6c50-a8c9-4b04-a461-49bfd0112eb0, cachesToClose=null, startCaches=[BgwService]] java.lang.NullPointerException| at org.apache.ignite.internal.processors.cache.CacheAffinitySharedManager.processClientCachesChanges(CacheAffinitySharedManager.java:635)| at org.apache.ignite.internal.processors.cache.GridCacheProcessor.processCustomExchangeTask(GridCacheProcessor.java:391)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.processCustomTask(GridCachePartitionExchangeManager.java:2475)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body0(GridCachePartitionExchangeManager.java:2620)| at org.apache.ignite.internal.processors.cache.GridCachePartitionExchangeManager$ExchangeWorker.body(GridCachePartitionExchangeManager.java:2539)| at org.apache.ignite.internal.util.worker.GridWorker.run(GridWorker.java:120)| at java.lang.Thread.run(Thread.java:748)"

Below is my ignite-xml file:
ignite-config.xml:
----
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xmlns:util="http://www.springframework.org/schema/util"
     xsi:schemaLocation="
      http://www.springframework.org/schema/beans
      http://www.springframework.org/schema/beans/spring-beans.xsd
      http://www.springframework.org/schema/util
      http://www.springframework.org/schema/util/spring-util.xsd">
<bean class="org.apache.ignite.configuration.IgniteConfiguration">
<property name="WorkDirectory" value="/opt/ignite/persistence//work"/>
 <property name="connectorConfiguration">
       <bean class="org.apache.ignite.configuration.ConnectorConfiguration">
           <property name="jettyPath" value="/opt/ignite/conf/jetty-server.xml" />
       </bean>
 </property>
 <property name="peerClassLoadingEnabled" value="true"/>
 <property name="dataStorageConfiguration">
      <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
          <!-- Enable metrics for Ignite persistence  -->
          <property name="metricsEnabled" value="true"/>
          <property name="defaultDataRegionConfiguration">
              <bean class="org.apache.ignite.configuration.DataRegionConfiguration">

                  <property name="name" value="Default_Region"/>
                  <property name="pageEvictionMode" value="RANDOM_2_LRU"/>
                  <property name="initialSize" value="#{20 * 1024 * 1024}"/>
                  <property name="maxSize" value="#{60L * 1024 * 1024 * 1024}"/>
                  <!-- Enabling Apache Ignite Persistent Store. -->
                  <property name="persistenceEnabled" value="true"/>
                  <!-- Enable metrics for this data region  -->
                  <property name="metricsEnabled" value="true"/>
              </bean>
          </property>
          <property name="storagePath" value="/opt/ignite/persistence/"/>
          <property name="walPath" value="/opt/ignite/wal/"/>
          <property name="walArchivePath" value="/opt/ignite/wal/archive"/>
          <property name="walMode" value="FSYNC"/>
      </bean>
  </property>
 <property name="authenticationEnabled" value="false"/>
 <property name="discoverySpi">
  <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
   <property name="ipFinder">
    <bean
        class="org.apache.ignite.spi.discovery.tcp.ipfinder.kubernetes.TcpDiscoveryKubernetesIpFinder">
        <property name="serviceName" value="ignite-svc"/>
        <property name="namespace" value="default"/>
    </bean>
   </property>
  </bean>
 </property>
    <property name="sqlSchemas">
        <list>
            <value>Trans</value>
            <value>Info</value>
            <value>Msg</value>
        </list>
    </property>
    <property name="cacheConfiguration">
             <list>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc1*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc1"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="MINUTES"/>
                           <constructor-arg value="75"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc6*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc6"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateabc16*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupabc16"/>
                   <property name="expiryPolicyFactory">
                     <bean class="javax.cache.expiry.CreatedExpiryPolicy" factory-method="factoryOf">
                       <constructor-arg>
                         <bean class="javax.cache.expiry.Duration">
                           <constructor-arg value="HOURS"/>
                           <constructor-arg value="25"/>
                         </bean>
                       </constructor-arg>
                     </bean>
                   </property>
                 </bean>
                 <bean id="cache-template-bean" abstract="true" class="org.apache.ignite.configuration.CacheConfiguration">
                   <property name="name" value="templateTrans*"/>
                   <property name="cacheMode" value="PARTITIONED"/>
                   <property name="backups" value="1"/>
                   <property name="groupName" value="groupTrans"/>
                 </bean>
             </list>
    </property>

</bean>
</beans>

Regards
radha

hs_err_pid116.log (778K) Download Attachment