Wednesday, December 7, 2011

WebLogic cluster monitor script

#Script to monitor running cluster
#will write thread dumps and other cluster diagnostic info to a file, at regular intervals

#Vernetto 2011-11-11

from java.io import File
from java.io import FileOutputStream
from java.io import FileInputStream
from java.util import HashMap
from java.util import HashSet
from java.util import ArrayList
from time import sleep

import datetime

import zlib
import zipfile
from os import *

configFileProperties = None
isConnected = false
previousStdout = None

#=======================================================================================
# Utility function to load properties from a config file
#=======================================================================================

def loadProps(configPropFile):
    global configFileProperties
    propInputStream = FileInputStream(configPropFile)
    configFileProperties = Properties()
    configFileProperties.load(propInputStream)
    

def appendToAlarmLog(alarmmessage):
    alarmsfile = configFileProperties.get('alarmsfile')
    alarmFile = open(alarmsfile, 'a')
    alarmFile.write(getNowTimestamp() + ' ' + alarmmessage + '\n')
    alarmFile.close()


def getNowTimestamp():
 now = datetime.datetime.now()
 nowtimestamp = now.strftime("%Y%m%dT%H%M%S")
 return nowtimestamp
 
def monitorServer():
    global isConnected
    fos = None    
    previousStdout = theInterpreter.getOut()
    try:
  #initialize variables
  serverURL=configFileProperties.get('serverURL')
  serverName=configFileProperties.get('serverName')
  username=configFileProperties.get('username')
  password=configFileProperties.get('password')
  logFile=configFileProperties.get('logFile')
  aliveServerCountExpected=configFileProperties.get('aliveServerCountExpected')
  interval=configFileProperties.get('interval')

  #initialize timestamp to append to log filename
  nowtimestamp = getNowTimestamp()


  #save stdout handle 
  logFileNameWithTimestamp = logFile + '.' + nowtimestamp

  #set new stdout

  print "start the script"
  #easeSyntax()

  if (not isConnected) :
      connect(username, password, serverURL)
      isConnected = true

  serverRuntime()
  
  #redirect output to log file
  f = File(logFileNameWithTimestamp)
  fos = FileOutputStream(f)
  theInterpreter.setOut(fos)

  cd('/')
  print 'Health', cmo.getHealthState()

  cd('/ClusterRuntime/myCluster/')
  print 'AliveServerCount=', cmo.getAliveServerCount(), ' ServerNames', cmo.getServerNames()

  if (aliveServerCountExpected != cmo.getAliveServerCount()) :
     alarmmessage = 'MONITORALARM, we were expecting AliveServerCount ' + aliveServerCountExpected + ' and we have instead %d , see file %s' % (cmo.getAliveServerCount() , logFileNameWithTimestamp, )
     print alarmmessage 
     appendToAlarmLog(alarmmessage)
     
     
  cd('/ClusterRuntime/myCluster/UnicastMessaging/UnicastMessagingRuntime')

  print 'DiscoveredGroupLeaders=',  cmo.getDiscoveredGroupLeaders(), ' Groups=', cmo.getGroups(), ' LocalGroupLeaderName=',  cmo.getLocalGroupLeaderName(), ' RemoteGroupsDiscoveredCount=',  cmo.getRemoteGroupsDiscoveredCount(), ' TotalGroupsCount=', cmo.getTotalGroupsCount()

  cd('/ServerChannelRuntimes/unicastChannel')
  print 'AcceptCount=' , cmo.getAcceptCount() , ' MessagesReceivedCount=' , cmo.getMessagesReceivedCount() , ' MessagesSentCount=' , cmo.getMessagesSentCount()
  scr = cmo.getServerConnectionRuntimes()
  #scr is an array of weblogic.server.channels.ServerConnectionRuntimeImpl$SerializableConnectionRuntime
  for myscr in scr:
   print "BytesReceivedCount=", myscr.getBytesReceivedCount(), " BytesSentCount=", myscr.getBytesSentCount(), " ConnectTime=", myscr.getConnectTime()," MessagesReceivedCount=",  myscr.getMessagesReceivedCount(), " MessagesSentCount=", myscr.getMessagesSentCount()

  print ""

  threadDump()


  cd('/JVMRuntime/' + serverName)
  #this is valid for JRockit
  #print "HeapFreeCurrent=", cmo.getHeapFreeCurrent(), " TotalGarbageCollectionTime", cmo.getTotalGarbageCollectionTime(), " TotalNumberOfThreads=", cmo.getTotalNumberOfThreads()
  #this is valid for JRockit
  print "HeapFreeCurrent=", cmo.getHeapFreeCurrent(), ' HeapSizeCurrent=', cmo.getHeapSizeCurrent()

  cd('/ThreadPoolRuntime/ThreadPoolRuntime')

  print 'HoggingThreadCount=', cmo.getHoggingThreadCount(), ' PendingUserRequestCount', cmo.getPendingUserRequestCount(), ' StandbyThreadCount' , cmo.getStandbyThreadCount()
  print 'CompletedRequestCount=', cmo.getCompletedRequestCount(), ' ExecuteThreadIdleCount=', cmo.getExecuteThreadIdleCount(), ' ExecuteThreadTotalCount=', cmo.getExecuteThreadTotalCount()

  #restore stdout
  theInterpreter.setOut(previousStdout)

  fos.close()
  #now zip the report
  zipfileLog = zipfile.ZipFile(logFileNameWithTimestamp + '.zip', 'w')
  zipfileLog.write(logFileNameWithTimestamp, compress_type=zipfile.ZIP_DEFLATED)
  zipfileLog.close()
  os.remove(logFileNameWithTimestamp)

  
    except:
        isConnected = false
        theInterpreter.setOut(previousStdout)
        if (fos != None) :
            fos.close()
        print "Unexpected error:", sys.exc_info()[0]
        raise
  

  
  

# monitor script init
try:
    # sys.argv[1] is the config properties file
 configFile = sys.argv[1]
 print 'Loading config from :', configFile
 loadProps(configFile)
 interval = configFileProperties.getProperty('interval')

 while True:
  try:
   sleep(float(interval))
   monitorServer()
  except:
   errorMessage = "ERROR_QUERYING_SERVER %s - %s - %s"% (sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2], )
   appendToAlarmLog(errorMessage)
   dumpStack()


except:
    print "Unexpected error: ", sys.exc_info()[0]
    dumpStack()
    raise
 






and the property file contains:


serverURL=t3://pierrepc:7031
serverName=ms3
username=weblogic
password=welcome1
logFile=C:/pierre/clustermonitor/clustermonitorinfo3.log
alarmsfile=C:/pierre/clustermonitor/clustermonitoralarms3.log
aliveServerCountExpected=3
interval=10

No comments: