Talk about the MonitorService of Elasticsearch

order

This paper focuses on the monitor service of elastic search

MonitorService

elasticsearch-7.0.1/server/src/main/java/org/elasticsearch/monitor/MonitorService.java

public class MonitorService extends AbstractLifecycleComponent {

    private final JvmGcMonitorService jvmGcMonitorService;
    private final OsService osService;
    private final ProcessService processService;
    private final JvmService jvmService;
    private final FsService fsService;

    public MonitorService(Settings settings, NodeEnvironment nodeEnvironment, ThreadPool threadPool,
                          ClusterInfoService clusterInfoService) throws IOException {
        this.jvmGcMonitorService = new JvmGcMonitorService(settings, threadPool);
        this.osService = new OsService(settings);
        this.processService = new ProcessService(settings);
        this.jvmService = new JvmService(settings);
        this.fsService = new FsService(settings, nodeEnvironment, clusterInfoService);
    }

    public OsService osService() {
        return this.osService;
    }

    public ProcessService processService() {
        return this.processService;
    }

    public JvmService jvmService() {
        return this.jvmService;
    }

    public FsService fsService() {
        return this.fsService;
    }

    @Override
    protected void doStart() {
        jvmGcMonitorService.start();
    }

    @Override
    protected void doStop() {
        jvmGcMonitorService.stop();
    }

    @Override
    protected void doClose() {
        jvmGcMonitorService.close();
    }

}
  • The constructor of MonitorService creates the jvmGcMonitorService, osService, processService, jvmService and fsService. doStart, doStop and doClose call the start, stop and close methods of jvmGcMonitorService respectively

JvmGcMonitorService

elasticsearch-7.0.1/server/src/main/java/org/elasticsearch/monitor/jvm/JvmGcMonitorService.java

public class JvmGcMonitorService extends AbstractLifecycleComponent {
    private static final Logger logger = LogManager.getLogger(JvmGcMonitorService.class);

    private final ThreadPool threadPool;
    private final boolean enabled;
    private final TimeValue interval;
    private final Map<String, GcThreshold> gcThresholds;
    private final GcOverheadThreshold gcOverheadThreshold;

    private volatile Cancellable scheduledFuture;

    public static final Setting<Boolean> ENABLED_SETTING =
        Setting.boolSetting("monitor.jvm.gc.enabled", true, Property.NodeScope);
    public static final Setting<TimeValue> REFRESH_INTERVAL_SETTING =
        Setting.timeSetting("monitor.jvm.gc.refresh_interval", TimeValue.timeValueSeconds(1), TimeValue.timeValueSeconds(1),
            Property.NodeScope);

    private static String GC_COLLECTOR_PREFIX = "monitor.jvm.gc.collector.";
    public static final Setting<Settings> GC_SETTING = Setting.groupSetting(GC_COLLECTOR_PREFIX, Property.NodeScope);

    public static final Setting<Integer> GC_OVERHEAD_WARN_SETTING =
        Setting.intSetting("monitor.jvm.gc.overhead.warn", 50, 0, 100, Property.NodeScope);
    public static final Setting<Integer> GC_OVERHEAD_INFO_SETTING =
        Setting.intSetting("monitor.jvm.gc.overhead.info", 25, 0, 100, Property.NodeScope);
    public static final Setting<Integer> GC_OVERHEAD_DEBUG_SETTING =
        Setting.intSetting("monitor.jvm.gc.overhead.debug", 10, 0, 100, Property.NodeScope);

    //......

    public JvmGcMonitorService(Settings settings, ThreadPool threadPool) {
        this.threadPool = threadPool;

        this.enabled = ENABLED_SETTING.get(settings);
        this.interval = REFRESH_INTERVAL_SETTING.get(settings);

        Map<String, GcThreshold> gcThresholds = new HashMap<>();
        Map<String, Settings> gcThresholdGroups = GC_SETTING.get(settings).getAsGroups();
        for (Map.Entry<String, Settings> entry : gcThresholdGroups.entrySet()) {
            String name = entry.getKey();
            TimeValue warn = getValidThreshold(entry.getValue(), entry.getKey(), "warn");
            TimeValue info = getValidThreshold(entry.getValue(), entry.getKey(), "info");
            TimeValue debug = getValidThreshold(entry.getValue(), entry.getKey(), "debug");
            gcThresholds.put(name, new GcThreshold(name, warn.millis(), info.millis(), debug.millis()));
        }
        gcThresholds.putIfAbsent(GcNames.YOUNG, new GcThreshold(GcNames.YOUNG, 1000, 700, 400));
        gcThresholds.putIfAbsent(GcNames.OLD, new GcThreshold(GcNames.OLD, 10000, 5000, 2000));
        gcThresholds.putIfAbsent("default", new GcThreshold("default", 10000, 5000, 2000));
        this.gcThresholds = unmodifiableMap(gcThresholds);

        if (GC_OVERHEAD_WARN_SETTING.get(settings) <= GC_OVERHEAD_INFO_SETTING.get(settings)) {
            final String message =
                String.format(
                    Locale.ROOT,
                    "[%s] must be greater than [%s] [%d] but was [%d]",
                    GC_OVERHEAD_WARN_SETTING.getKey(),
                    GC_OVERHEAD_INFO_SETTING.getKey(),
                    GC_OVERHEAD_INFO_SETTING.get(settings),
                    GC_OVERHEAD_WARN_SETTING.get(settings));
            throw new IllegalArgumentException(message);
        }
        if (GC_OVERHEAD_INFO_SETTING.get(settings) <= GC_OVERHEAD_DEBUG_SETTING.get(settings)) {
            final String message =
                String.format(
                    Locale.ROOT,
                    "[%s] must be greater than [%s] [%d] but was [%d]",
                    GC_OVERHEAD_INFO_SETTING.getKey(),
                    GC_OVERHEAD_DEBUG_SETTING.getKey(),
                    GC_OVERHEAD_DEBUG_SETTING.get(settings),
                    GC_OVERHEAD_INFO_SETTING.get(settings));
            throw new IllegalArgumentException(message);
        }

        this.gcOverheadThreshold = new GcOverheadThreshold(
            GC_OVERHEAD_WARN_SETTING.get(settings),
            GC_OVERHEAD_INFO_SETTING.get(settings),
            GC_OVERHEAD_DEBUG_SETTING.get(settings));

        logger.debug(
            "enabled [{}], interval [{}], gc_threshold [{}], overhead [{}, {}, {}]",
            this.enabled,
            this.interval,
            this.gcThresholds,
            this.gcOverheadThreshold.warnThreshold,
            this.gcOverheadThreshold.infoThreshold,
            this.gcOverheadThreshold.debugThreshold);
    }

    @Override
    protected void doStart() {
        if (!enabled) {
            return;
        }
        scheduledFuture = threadPool.scheduleWithFixedDelay(new JvmMonitor(gcThresholds, gcOverheadThreshold) {
            @Override
            void onMonitorFailure(Exception e) {
                logger.debug("failed to monitor", e);
            }

            @Override
            void onSlowGc(final Threshold threshold, final long seq, final SlowGcEvent slowGcEvent) {
                logSlowGc(logger, threshold, seq, slowGcEvent, JvmGcMonitorService::buildPools);
            }

            @Override
            void onGcOverhead(final Threshold threshold, final long current, final long elapsed, final long seq) {
                logGcOverhead(logger, threshold, current, elapsed, seq);
            }
        }, interval, Names.SAME);
    }

 	@Override
    protected void doStop() {
        if (!enabled) {
            return;
        }
        scheduledFuture.cancel();
    }

    @Override
    protected void doClose() {
    }

    //......
}
  • The doStart method of JvmGcMonitorService registers the timing task of JvmMonitor through scheduleWithFixedDelay, and its doStop method is to cancel the timing task

JvmMonitor

elasticsearch-7.0.1/server/src/main/java/org/elasticsearch/monitor/jvm/JvmGcMonitorService.java

    abstract static class JvmMonitor implements Runnable {

        enum Threshold { DEBUG, INFO, WARN }

        static class SlowGcEvent {

            final GarbageCollector currentGc;
            final long collectionCount;
            final TimeValue collectionTime;
            final long elapsed;
            final JvmStats lastJvmStats;
            final JvmStats currentJvmStats;
            final ByteSizeValue maxHeapUsed;

            SlowGcEvent(
                final GarbageCollector currentGc,
                final long collectionCount,
                final TimeValue collectionTime,
                final long elapsed,
                final JvmStats lastJvmStats,
                final JvmStats currentJvmStats,
                final ByteSizeValue maxHeapUsed) {
                this.currentGc = currentGc;
                this.collectionCount = collectionCount;
                this.collectionTime = collectionTime;
                this.elapsed = elapsed;
                this.lastJvmStats = lastJvmStats;
                this.currentJvmStats = currentJvmStats;
                this.maxHeapUsed = maxHeapUsed;
            }

        }

        private long lastTime = now();
        private JvmStats lastJvmStats = jvmStats();
        private long seq = 0;
        private final Map<String, JvmGcMonitorService.GcThreshold> gcThresholds;
        final GcOverheadThreshold gcOverheadThreshold;

        JvmMonitor(final Map<String, GcThreshold> gcThresholds, final GcOverheadThreshold gcOverheadThreshold) {
            this.gcThresholds = Objects.requireNonNull(gcThresholds);
            this.gcOverheadThreshold = Objects.requireNonNull(gcOverheadThreshold);
        }

        @Override
        public void run() {
            try {
                monitorGc();
            } catch (Exception e) {
                onMonitorFailure(e);
            }
        }

        abstract void onMonitorFailure(Exception e);

        synchronized void monitorGc() {
            seq++;
            final long currentTime = now();
            JvmStats currentJvmStats = jvmStats();

            final long elapsed = TimeUnit.NANOSECONDS.toMillis(currentTime - lastTime);

            monitorSlowGc(currentJvmStats, elapsed);
            monitorGcOverhead(currentJvmStats, elapsed);

            lastTime = currentTime;
            lastJvmStats = currentJvmStats;
        }

        final void monitorSlowGc(JvmStats currentJvmStats, long elapsed) {
            for (int i = 0; i < currentJvmStats.getGc().getCollectors().length; i++) {
                GarbageCollector gc = currentJvmStats.getGc().getCollectors()[i];
                GarbageCollector prevGc = lastJvmStats.getGc().getCollectors()[i];

                // no collection has happened
                long collections = gc.getCollectionCount() - prevGc.getCollectionCount();
                if (collections == 0) {
                    continue;
                }
                long collectionTime = gc.getCollectionTime().millis() - prevGc.getCollectionTime().millis();
                if (collectionTime == 0) {
                    continue;
                }

                GcThreshold gcThreshold = gcThresholds.get(gc.getName());
                if (gcThreshold == null) {
                    gcThreshold = gcThresholds.get("default");
                }

                long avgCollectionTime = collectionTime / collections;

                Threshold threshold = null;
                if (avgCollectionTime > gcThreshold.warnThreshold) {
                    threshold = Threshold.WARN;
                } else if (avgCollectionTime > gcThreshold.infoThreshold) {
                    threshold = Threshold.INFO;
                } else if (avgCollectionTime > gcThreshold.debugThreshold) {
                    threshold = Threshold.DEBUG;
                }
                if (threshold != null) {
                    onSlowGc(threshold, seq, new SlowGcEvent(
                        gc,
                        collections,
                        TimeValue.timeValueMillis(collectionTime),
                        elapsed,
                        lastJvmStats,
                        currentJvmStats,
                        JvmInfo.jvmInfo().getMem().getHeapMax()));
                }
            }
        }

        final void monitorGcOverhead(final JvmStats currentJvmStats, final long elapsed) {
            long current = 0;
            for (int i = 0; i < currentJvmStats.getGc().getCollectors().length; i++) {
                GarbageCollector gc = currentJvmStats.getGc().getCollectors()[i];
                GarbageCollector prevGc = lastJvmStats.getGc().getCollectors()[i];
                current += gc.getCollectionTime().millis() - prevGc.getCollectionTime().millis();
            }
            checkGcOverhead(current, elapsed, seq);
        }

        void checkGcOverhead(final long current, final long elapsed, final long seq) {
            final int fraction = (int) ((100 * current) / (double) elapsed);
            Threshold overheadThreshold = null;
            if (fraction >= gcOverheadThreshold.warnThreshold) {
                overheadThreshold = Threshold.WARN;
            } else if (fraction >= gcOverheadThreshold.infoThreshold) {
                overheadThreshold = Threshold.INFO;
            } else if (fraction >= gcOverheadThreshold.debugThreshold) {
                overheadThreshold = Threshold.DEBUG;
            }
            if (overheadThreshold != null) {
                onGcOverhead(overheadThreshold, current, elapsed, seq);
            }
        }

        JvmStats jvmStats() {
            return JvmStats.jvmStats();
        }

        long now() {
            return System.nanoTime();
        }

        abstract void onSlowGc(Threshold threshold, long seq, SlowGcEvent slowGcEvent);

        abstract void onGcOverhead(Threshold threshold, long total, long elapsed, long seq);

    }
  • JvmMonitor implements the Runnable interface, and its run method implements the monitorGc method, and the onMonitorFailure method when there is an exception. JvmMonitor also defines that the abstract methods of onMonitorFailure, onSlowGc, and ongcobehead need to be implemented by subclasses
  • The monitorGc method first obtains the currentJvmStats, and then executes monitorSlowGc and monitorGcOverhead
  • The monitorSlowGc method mainly calculates the avgCollectionTime, and then determines whether it exceeds the threshold value of the specified level. If it exceeds the threshold value, it will call back the onSlowGc method. The monitorGcOverhead method mainly calculates the proportion of gc time. If it exceeds the threshold value of the specified level, it will call back the ongcioverhead method

Summary

  • The constructor of MonitorService creates the jvmGcMonitorService, osService, processService, jvmService and fsService. doStart, doStop and doClose call the start, stop and close methods of jvmGcMonitorService respectively
  • The doStart method of JvmGcMonitorService registers the timing task of JvmMonitor through scheduleWithFixedDelay, and its doStop method is to cancel the timing task
  • JvmMonitor implements the Runnable interface, and its run method implements the monitorGc method, and the onMonitorFailure method when there is an exception. JvmMonitor also defines that the abstract methods of onMonitorFailure, onSlowGc, and ongcobehead need to be implemented by subclasses

doc

Tags: Programming jvm ElasticSearch Java

Posted on Mon, 04 Nov 2019 10:48:30 -0800 by Jeroen_nld