With Ganglia, graphing a large number of servers has never been so easy… Ganglia is a scalable distributed monitoring system for high-performance computing systems such as clusters and Grids. Ganglia let you create any kind of module in C/C++ or Python. You can also use the command line tool Gmetric and then the scripting language of your choice. The problem with Gmetrics is that you can’t keep your data organized by group and it’s getting harder to poll values in an efficient way. Few months ago I needed to monitor some JMX values returned by a Java daemon.

This post is quite succinct as I consider you already know Ganglia, Python and Java.

To monitor Java JMX values with Ganglia, I used JPype. JPype is an effort to allow python programs full access to java class libraries. This allowed me to port easily the JMX Munin plugin, you can get this plugin on Munin Exchange. The port, is really basic as I just converted few method to make sure I can publicly access them from the Python module. You can take a look at the [jmxquery.patch][1] or download [jmxquery.jar][2].

diff -urBN jmxquery.a/src/jmxquery/src/org/munin/JMXQuery.java jmxquery.b/src/jmxquery/src/org/munin/JMXQuery.java
--- jmxquery.a/src/jmxquery/src/org/munin/JMXQuery.java 2007-05-14 01:07:44.000000000 -0700
+++ jmxquery.b/src/jmxquery/src/org/munin/JMXQuery.java 2010-01-15 15:40:00.000000000 -0800
@@ -61,7 +61,7 @@
        this.password = password;
    }

-   private void connect() throws IOException
+   public void connect() throws IOException
    {
        Map environment = null;
        if (username != null && password != null)
@@ -78,7 +78,18 @@
        connection = connector.getMBeanServerConnection();
    }

-   private void list() throws IOException, InstanceNotFoundException, IntrospectionException, ReflectionException
+   public void ping() throws IOException
+   {
+     try {
+       MBeanServerConnection testConnection = connector.getMBeanServerConnection();
+       if (testConnection == null)
+          this.connect();
+     } catch (IOException e) {
+       this.connect();
+     }
+   }
+
+   public void list() throws IOException, InstanceNotFoundException, IntrospectionException, ReflectionException
    {
        if (config == null)
        {
@@ -90,7 +101,7 @@
        }
    }

-   private void listConfig()
+   public void listConfig()
    {
        for (FieldProperties field : config.getFields())
        {
@@ -107,7 +118,22 @@
        }
    }

-   private void output(String name, Object attr, String key)
+   public String output(String name, String JmxObjectName, String JmxAttributeName, String Key)
+   {
+    try
+    {
+      Object value = connection.getAttribute(new ObjectName(JmxObjectName), JmxAttributeName);
+      return output(name, value, Key);
+    }
+    catch (Exception e)
+    {
+      System.err.println("Fail to output " + name);
+      e.printStackTrace();
+    }
+    return null;
+   }
+
+   public String output(String name, Object attr, String key)
    {
        if (attr instanceof CompositeDataSupport)
        {
@@ -116,15 +142,17 @@
            {
                throw new IllegalArgumentException("Key is null for composed data " + name);
            }
-           System.out.println(name + ".value " + format(cds.get(key)));
+           //System.out.println(name + ".value " + format(cds.get(key)));
+           return format(cds.get(key));
        }
        else
        {
-           System.out.println(name + ".value " + format(attr));
+           //System.out.println(name + ".value " + format(attr));
+         return format(attr);
        }
    }

-   private void output(String name, Object attr)
+   public void output(String name, Object attr)
    {
        if (attr instanceof CompositeDataSupport)
        {
@@ -142,7 +170,7 @@
    }

    @SuppressWarnings("unchecked")
-   private void listAll() throws IOException, InstanceNotFoundException, IntrospectionException, ReflectionException
+   public void listAll() throws IOException, InstanceNotFoundException, IntrospectionException, ReflectionException
    {
        Set

Once JPype is installed, make sure your Ganglia is configured with the python module.

# The modules section describes the module
#  that should be loaded.
#   name - module name
#   path - load path of the .so
#   params - path to the directory where mod_python
#             should look for python metric modules
modules {
  module {
    name = "python_module"
    path = "modpython.so"
    params = "/opt/ganglia/python_modules"
  }
}

include ('/opt/ganglia/etc/conf.d/*.pyconf')

Then you can use this basic [jmx python][3] module:

"""
Ganglia Module to graph JMX values using JPype
Author: Nicolas Brousse
"""
from jpype import *

import os, platform, sys, time

debug = 0

_jmx = None
_jmx_cur_status = {}
_jmx_pre_status = {}
_jmx_pre_time = {}
_eta = 0
_jmx_params = {'User': '',
              'Password': '',
              'Host': '127.0.0.1',
              'Port': '8989',
              'JMXWrapper': '/opt/ganglia/jmxquery.jar'}

descriptors = []

gauge_metrics = {}

def Metric_Handler(name):
    global _jmx

    if _jmx is None:
        _jmx = initJVM()

    pre = None
    cur = 0
    eta = 0

    if debug:
        print "[ DEBUG ] _jmx_cur_status length: %d" % len(_jmx_cur_status)
        print "[ DEBUG ] _jmx_pre_status length: %d" % len(_jmx_pre_status)

    if _jmx is None:
        raise TypeError("No valid JMX Object !")

    if name in _jmx_cur_status:
        _jmx_pre_status[name] = _jmx_cur_status[name]

    for d in descriptors:
        if d['name'] == name:
            break

    try:
        if "jmxKeyName" in d:
            key = d['jmxKeyName']
        else:
            key = None

        if debug:
            print "[ DEBUG ] _jmx.output(%s, %s, %s, %s)" % \
                (d['name'], d['jmxObjectName'], d['jmxAttributeName'], key)

        _jmx.ping()
        result = _jmx.output(d['name'], d['jmxObjectName'], d['jmxAttributeName'], key)
    except:
        print "Error: ", sys.exc_info()[1]
        raise

    if result.isdigit():
        if debug: print "[ DEBUG ] %s: %s" % (name, result)
        _jmx_cur_status[name] = result

        cur_time = time.time()
        if name in _jmx_pre_time:
            eta = cur_time - _jmx_pre_time[name]
            if debug:
                print '[ DEBUG ] ETA: %d, Last time: %d, Cur time: %d' % (eta, _jmx_pre_time[name], cur_time)

        _jmx_pre_time[name] = cur_time

    if name in _jmx_cur_status:
        cur = _jmx_cur_status[name]
    else:
        cur = 0
    if len(_jmx_pre_status) > 0 and name in _jmx_pre_status:
        pre = _jmx_pre_status[name]
    else:
        pre = 0
    eta = int(eta)

    if name in gauge_metrics:
        if pre is not None and eta > 0:
            ret = (long(cur) - long(pre)) / eta
        else:
            ret = 0
    else:
        ret = cur

    if debug:
        print "[ DEBUG ] Metric_Handler(%s): %s (eta: %s, cur: %s, pre: %s)" % (name, ret, eta, cur, pre)

    return long(ret)

def Init_Metric (name, jmxObjectName, jmxAttributeName, jmxKeyName, tmax, type, slope, units, fmt, handler):
    '''Create a metric definition dictionary object.'''
    d = {'name': name.lower(),
        'jmxObjectName': jmxObjectName,
        'jmxAttributeName': jmxAttributeName,
        'jmxKeyName': jmxKeyName,
        'call_back': handler,
        'time_max': tmax,
        'value_type': type,
        'units': units,
        'slope': slope,
        'format': fmt,
        'description': 'JMX '+name.replace("_"," "),
        'groups': 'jmx'}
    return d

def metric_init(params):
    global _jmx, _jmx_params

    if debug: print "[ DEBUG ] metric_init()"

    if 'User' in params:
        _jmx_params['User'] = params['User']

    if 'Password' in params:
        _jmx_params['Password'] = params['Password']

    if 'Host' in params:
        _jmx_params['Host'] = params['Host']

    if 'Port' in params:
        _jmx_params['Port'] = int(params['Port'])

    if 'JMXWrapper' in params:
        _jmx_params['JMXWrapper'] = params['JMXWrapper']

    return descriptors

def initJVM():
    if debug: print "[ DEBUG ] initJVM()"

    if platform.architecture()[0] == "32bit":
        arch="i386"
    else:
        arch="amd64"

    jvm="/usr/java/jdk/jre/lib/%s/client/libjvm.so" % arch
    if not os.path.isfile(jvm):
        jvm="/usr/java/jdk/jre/lib/%s/server/libjvm.so" % arch
        if not os.path.isfile(jvm):
            raise IOError("Can't find the libjvm.so (%s)" % jvm)

    if not os.path.isfile(_jmx_params['JMXWrapper']):
        raise IOError("Can't find the JMX Wrapper at'%s'" % \
            _jmx_params['JMXWrapper'])

    try:
        startJVM(jvm, "-Djava.class.path=%s" % _jmx_params['JMXWrapper'])

        url = "service:jmx:rmi:///jndi/rmi://%s:%d/jmxrmi" % \
               (_jmx_params['Host'], int(_jmx_params['Port']))

        JMXQuery = JClass("org.munin.JMXQuery")

        if _jmx_params['Password'] is not '':
            _jmx = JMXQuery(url, username, password)
        else:
            _jmx = JMXQuery(url)

        _jmx.connect()
    except:
        print "Error: ", sys.exc_info()[1]
        raise

    return _jmx

def metric_cleanup():
    '''Clean up the metric module.'''
    #shutdownJVM()

def Build_Conf():
    print "modules {\n module {\n  name = \"jmx\"\n  language = \"python\"\n }\n}\n"
    print "collection_group {\n collect_every = 30\n time_threshold = 60"
    for d in descriptors:
        print " metric {\n  name = \""+d['name']+"\"\n  title = \""+d['description']+"\"\n  value_threshold = 1.0\n }"
    print "}\n"

descriptors.append(
    Init_Metric("java_cpu_time", "java.lang:type=Threading", "CurrentThreadCpuTime", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_cpu_user_time", "java.lang:type=Threading", "CurrentThreadUserTime", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_nonheap_committed", "java.lang:type=Memory", "NonHeapMemoryUsage", "committed",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_nonheap_max", "java.lang:type=Memory", "NonHeapMemoryUsage", "max",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_nonheap_used", "java.lang:type=Memory", "NonHeapMemoryUsage", "used",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_heap_committed", "java.lang:type=Memory", "HeapMemoryUsage", "committed",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_heap_used", "java.lang:type=Memory", "HeapMemoryUsage", "used",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_memory_heap_max", "java.lang:type=Memory", "HeapMemoryUsage", "max",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("os_memory_physical", "java.lang:type=OperatingSystem", "FreePhysicalMemorySize", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("os_memory_vm", "java.lang:type=OperatingSystem", "CommittedVirtualMemorySize", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_thread_count", "java.lang:type=Threading", "ThreadCount", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))
descriptors.append(
    Init_Metric("java_thread_count_peak", "java.lang:type=Threading", "PeakThreadCount", "",
        int(300), 'uint', 'both', '', '%u', Metric_Handler))

#This code is for debugging and unit testing
if __name__ == '__main__':
    try:
        if len(sys.argv) <= 1:
            debug = 1

        metric_init(_jmx_params)

        if len(sys.argv) <= 1:
            while True:
                for d in descriptors:
                    v = d['call_back'](d['name'])
                time.sleep(5)
        elif sys.argv[1] == "config":
            Build_Conf()
            metric_cleanup()

    except KeyboardInterrupt:
        print "Process interrupted."
        if _JMX_WorkerThread.running and not _JMX_WorkerThread.shuttingdown:
            _JMX_WorkerThread.shutdown()
        time.sleep(0.2)
        sys.exit(1)

You can simply run “python jmx.py config” to generate the jmx.pyconf file. Also, you’d probably need to edit the plugin to check for your own JMX values. A lot of improvement could be done to this plugin, but all the basics are there and it can give you some awesome graph to monitor your Java application.

Comments