When the client frequently has FullGC, it blocks all requests from the server. "Possible starvation in striped pool"

classic Classic list List threaded Threaded
2 messages Options
no_problem no_problem
Reply | Threaded
Open this post in threaded view
|

When the client frequently has FullGC, it blocks all requests from the server. "Possible starvation in striped pool"

Hello
When the client frequently has FullGC, it blocks all requests from the server. I try to modify many server parameters to solve this problem.
The modified parameters are as follows:
slowClientQueueLimit
socketWriteTimeout
clientFailureDetectionTimeout
failureDetectionTimeout

The blocking occurred is a large number of "[2019-05-21T16:36:04,880][WARN ][grid-timeout-worker-#10343][G] >>> Possible starvation in striped pool."

Please refer to the attachment for the full log, 10.110.118.53 in the log is the FullGC test node.

What parameters can be modified to avoid similar problems? What adjustments do I need to make?

Thank you very much.

Ignite Version 2.4.0

server config file:

<?xml version="1.0" encoding="UTF-8"?>
        ">
    <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
     <!-- <property name="failureDetectionTimeout" value="60000"/> -->
     <!-- <property name="clientFailureDetectionTimeout" value="60000"/> -->
     <property name="segmentationPolicy" value="RESTART_JVM"/>
<property name="publicThreadPoolSize" value="192"/>
<property name="systemThreadPoolSize" value="192"/>

<property name="dataStreamerThreadPoolSize" value="96"/>
<property name="stripedPoolSize" value="10240"/>

<property name="rebalanceThreadPoolSize" value="4" />
        <property name="dataStorageConfiguration">
            <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
                <property name="defaultDataRegionConfiguration">
                    <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<property name="name" value="qipu_entity_cache_data_region"/>
<property name="initialSize" value="#{10L * 1024 * 1024 * 1024}"/>
<property name="maxSize" value="#{100L * 1024 * 1024 * 1024}"/>
                        <property name="persistenceEnabled" value="true"/>
<property name="metricsEnabled" value="true"/>
<property name="checkpointPageBufferSize" value="#{1 * 1024 * 1024 * 1024}"/>
                    </bean>
                </property>
<property name="storagePath" value="/home/qipu/production/apache-ignite-2.4.0/persistence"/>
<property name="walPath" value="/home/qipu/production/apache-ignite-2.4.0/wal"/>
<property name="walArchivePath" value="/home/qipu/production/apache-ignite-2.4.0/wal/archive"/>
<property name="walSegmentSize" value="#{64 * 1024 * 1024}"/>
<property name="pageSize" value="#{4 * 1024}"/>
<property name="walSegments" value="#{20}"/>
<property name="walMode" value="LOG_ONLY"/>
<property name="metricsEnabled" value="true"/>
<property name="writeThrottlingEnabled" value="false"/>
<property name="checkpointThreads" value="8"/>
<property name="walThreadLocalBufferSize" value="#{1 * 1024 * 1024}"/>
            </bean>
        </property>
<property name="cacheConfiguration">
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="qipu_entity_cache_data_region"/>
<property name="name" value="qipu_entity_cache"/>
<property name="cacheMode" value="PARTITIONED"/>
<property name="partitionLossPolicy" value="IGNORE"/>
<property name="atomicityMode" value="ATOMIC"/>
<property name="backups" value="2"/>
<property name="writeSynchronizationMode" value="FULL_SYNC"/>
<property name="statisticsEnabled" value="true"/>
<property name="rebalanceBatchSize" value="#{2 * 1024 * 1024}"/>
<property name="rebalanceThrottle" value="100"/>
<property name="rebalanceMode" value="ASYNC"/>
<property name="rebalanceTimeout" value="40000"/>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<property name="socketWriteTimeout" value="1500"/>
<property name="messageQueueLimit" value="102400"/>
<property name="slowClientQueueLimit" value="4000"/>
<property name="usePairedConnections" value="true"/>
</bean>
</property>
        <property name="discoverySpi">
            <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="forceServerMode" value="true"/>
<property name="ipFinder">
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                <!-- In distributed environment, replace with actual host IP address. -->
<value>10.16.133.179:47500..47509</value>
<value>10.16.133.180:47500..47509</value>
<value>10.16.133.181:47500..47509</value>
<value>10.16.133.182:47500..47509</value>
<value>10.16.133.183:47500..47509</value>
<value>10.16.133.184:47500..47509</value>
<value>10.16.133.185:47500..47509</value>
<value>10.16.133.186:47500..47509</value>
<value>10.16.133.187:47500..47509</value>
<value>10.16.133.188:47500..47509</value>
                            </list>
                        </property>
                    </bean>
</property>
            </bean>
        </property>
<property name="gridLogger">
<bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
<constructor-arg type="java.lang.String" value="/config/ignite-log4j2.xml"/>
</bean>
</property>
    </bean>
</beans>



client code:

IgniteCluster igniteCluster = IgniteCluster.valueOf("CLUSTER_A");
        boolean usePairedConnections = true;
        int messageQueueLimit = 20480;
        System.out.println("ignite.cluster: "+igniteCluster+" , ignite.usePairedConnections: "+usePairedConnections+" , ignite.messageQueueLimit: "+messageQueueLimit);

        Ignition.setClientMode(true);

        IgniteConfiguration cfg = new IgniteConfiguration();
        TcpDiscoverySpi spi = new TcpDiscoverySpi();

        TcpDiscoveryVmIpFinder finder = new TcpDiscoveryVmIpFinder();

        finder.setAddresses(Arrays.asList(igniteCluster.getConfig().getServer().split(",")));

        spi.setIpFinder(finder);

        TcpCommunicationSpi tcpCommunicationSpi = new TcpCommunicationSpi();
        tcpCommunicationSpi.setUsePairedConnections(usePairedConnections);
        tcpCommunicationSpi.setMessageQueueLimit(messageQueueLimit);

        cfg.setDiscoverySpi(spi);
        cfg.setCommunicationSpi(tcpCommunicationSpi);
        ignite = Ignition.start(cfg);

        igniteCache = ignite.getOrCreateCache(IgniteCacheName.valueOf("QIPU_ENTITY_CACHE").toString());

        // read operation
        byte[] value = cache.getAsync(key).get(500);
        // write operation
        cache.putAsync(entry.getKey(), entry.getValue()).get(putTimeOut);

fullGCBlockedServer.log (3M) Download Attachment
read_write_qps.jpeg (167K) Download Attachment
ilya.kasnacheev ilya.kasnacheev
Reply | Threaded
Open this post in threaded view
|

Re: When the client frequently has FullGC, it blocks all requests from the server. "Possible starvation in striped pool"

Hello!

I think that this will only be mitigated ny moving to some kind of thin client. Optionally you can try to bring thick client out of VM that is having long GCs (a separate JVM?).

Regards,
--
Ilya Kasnacheev


чт, 23 мая 2019 г. в 04:59, 赵剑 <[hidden email]>:
Hello
When the client frequently has FullGC, it blocks all requests from the server. I try to modify many server parameters to solve this problem.
The modified parameters are as follows:
slowClientQueueLimit
socketWriteTimeout
clientFailureDetectionTimeout
failureDetectionTimeout

The blocking occurred is a large number of "[2019-05-21T16:36:04,880][WARN ][grid-timeout-worker-#10343][G] >>> Possible starvation in striped pool."

Please refer to the attachment for the full log, 10.110.118.53 in the log is the FullGC test node.

What parameters can be modified to avoid similar problems? What adjustments do I need to make?

Thank you very much.

Ignite Version 2.4.0

server config file:

<?xml version="1.0" encoding="UTF-8"?>
        ">
    <bean id="ignite.cfg" class="org.apache.ignite.configuration.IgniteConfiguration">
     <!-- <property name="failureDetectionTimeout" value="60000"/> -->
     <!-- <property name="clientFailureDetectionTimeout" value="60000"/> -->
     <property name="segmentationPolicy" value="RESTART_JVM"/>
<property name="publicThreadPoolSize" value="192"/>
<property name="systemThreadPoolSize" value="192"/>

<property name="dataStreamerThreadPoolSize" value="96"/>
<property name="stripedPoolSize" value="10240"/>

<property name="rebalanceThreadPoolSize" value="4" />
        <property name="dataStorageConfiguration">
            <bean class="org.apache.ignite.configuration.DataStorageConfiguration">
                <property name="defaultDataRegionConfiguration">
                    <bean class="org.apache.ignite.configuration.DataRegionConfiguration">
<property name="name" value="qipu_entity_cache_data_region"/>
<property name="initialSize" value="#{10L * 1024 * 1024 * 1024}"/>
<property name="maxSize" value="#{100L * 1024 * 1024 * 1024}"/>
                        <property name="persistenceEnabled" value="true"/>
<property name="metricsEnabled" value="true"/>
<property name="checkpointPageBufferSize" value="#{1 * 1024 * 1024 * 1024}"/>
                    </bean>
                </property>
<property name="storagePath" value="/home/qipu/production/apache-ignite-2.4.0/persistence"/>
<property name="walPath" value="/home/qipu/production/apache-ignite-2.4.0/wal"/>
<property name="walArchivePath" value="/home/qipu/production/apache-ignite-2.4.0/wal/archive"/>
<property name="walSegmentSize" value="#{64 * 1024 * 1024}"/>
<property name="pageSize" value="#{4 * 1024}"/>
<property name="walSegments" value="#{20}"/>
<property name="walMode" value="LOG_ONLY"/>
<property name="metricsEnabled" value="true"/>
<property name="writeThrottlingEnabled" value="false"/>
<property name="checkpointThreads" value="8"/>
<property name="walThreadLocalBufferSize" value="#{1 * 1024 * 1024}"/>
            </bean>
        </property>
<property name="cacheConfiguration">
<bean class="org.apache.ignite.configuration.CacheConfiguration">
<property name="dataRegionName" value="qipu_entity_cache_data_region"/>
<property name="name" value="qipu_entity_cache"/>
<property name="cacheMode" value="PARTITIONED"/>
<property name="partitionLossPolicy" value="IGNORE"/>
<property name="atomicityMode" value="ATOMIC"/>
<property name="backups" value="2"/>
<property name="writeSynchronizationMode" value="FULL_SYNC"/>
<property name="statisticsEnabled" value="true"/>
<property name="rebalanceBatchSize" value="#{2 * 1024 * 1024}"/>
<property name="rebalanceThrottle" value="100"/>
<property name="rebalanceMode" value="ASYNC"/>
<property name="rebalanceTimeout" value="40000"/>
</bean>
</property>
<property name="communicationSpi">
<bean class="org.apache.ignite.spi.communication.tcp.TcpCommunicationSpi">
<property name="socketWriteTimeout" value="1500"/>
<property name="messageQueueLimit" value="102400"/>
<property name="slowClientQueueLimit" value="4000"/>
<property name="usePairedConnections" value="true"/>
</bean>
</property>
        <property name="discoverySpi">
            <bean class="org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi">
<property name="forceServerMode" value="true"/>
<property name="ipFinder">
<bean class="org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder">
                        <property name="addresses">
                            <list>
                                <!-- In distributed environment, replace with actual host IP address. -->
<value>10.16.133.179:47500..47509</value>
<value>10.16.133.180:47500..47509</value>
<value>10.16.133.181:47500..47509</value>
<value>10.16.133.182:47500..47509</value>
<value>10.16.133.183:47500..47509</value>
<value>10.16.133.184:47500..47509</value>
<value>10.16.133.185:47500..47509</value>
<value>10.16.133.186:47500..47509</value>
<value>10.16.133.187:47500..47509</value>
<value>10.16.133.188:47500..47509</value>
                            </list>
                        </property>
                    </bean>
</property>
            </bean>
        </property>
<property name="gridLogger">
<bean class="org.apache.ignite.logger.log4j2.Log4J2Logger">
<constructor-arg type="java.lang.String" value="/config/ignite-log4j2.xml"/>
</bean>
</property>
    </bean>
</beans>



client code:

IgniteCluster igniteCluster = IgniteCluster.valueOf("CLUSTER_A");
        boolean usePairedConnections = true;
        int messageQueueLimit = 20480;
        System.out.println("ignite.cluster: "+igniteCluster+" , ignite.usePairedConnections: "+usePairedConnections+" , ignite.messageQueueLimit: "+messageQueueLimit);

        Ignition.setClientMode(true);

        IgniteConfiguration cfg = new IgniteConfiguration();
        TcpDiscoverySpi spi = new TcpDiscoverySpi();

        TcpDiscoveryVmIpFinder finder = new TcpDiscoveryVmIpFinder();

        finder.setAddresses(Arrays.asList(igniteCluster.getConfig().getServer().split(",")));

        spi.setIpFinder(finder);

        TcpCommunicationSpi tcpCommunicationSpi = new TcpCommunicationSpi();
        tcpCommunicationSpi.setUsePairedConnections(usePairedConnections);
        tcpCommunicationSpi.setMessageQueueLimit(messageQueueLimit);

        cfg.setDiscoverySpi(spi);
        cfg.setCommunicationSpi(tcpCommunicationSpi);
        ignite = Ignition.start(cfg);

        igniteCache = ignite.getOrCreateCache(IgniteCacheName.valueOf("QIPU_ENTITY_CACHE").toString());

        // read operation
        byte[] value = cache.getAsync(key).get(500);
        // write operation
        cache.putAsync(entry.getKey(), entry.getValue()).get(putTimeOut);