4 Small utility to continuous trace an afs-client in a files storing x mins and keep files if something has happened
7 import subprocess, time, getopt, sys
8 from multiprocessing import Process
9 from shutil import copyfile
12 Output="afsclient.trace"
13 ProcessTableFilename="proctable"
16 FSTRACEBIN="/usr/sbin/fstrace"
19 TraceBufferSize=8192 # buffersize in k, needs to be big enough to store the data during Dumpinterval
20 Dumpinterval = 1 # at what interval buffer should be dumped to file
21 DumpProcs=[None, None]
22 EventCheckInterval = 1 # interval for checking for an externalEvent
23 RotationInterval=5 # 5 EventCheckInterval
24 EventFile="./fstrace_trigger"
33 if verbose : print "Clearing log"
35 subprocess.check_call([FSTRACEBIN,"clear", "cm"])
36 except subprocess.CalledProcessError:
37 print "Unable to run fstrace-binary at %s." % FSTRACEBIN
39 if verbose : print "setting eventset cm active"
40 subprocess.check_call([FSTRACEBIN,"setset","cm","-active"])
41 if verbose : print "Setting buffersize of log cmfx"
42 subprocess.check_call([FSTRACEBIN,"setlog","cmfx","-buffersize", "%s" % TraceBufferSize])
45 def dumpFSTrace(FileName) :
46 subprocess.check_call([FSTRACEBIN,"dump","-follow","cmfx","-file",FileName, "-sleep", "%s" % Dumpinterval])
49 def saveLogs(ProcessTable, TraceFileNo):
50 NowStr=time.strftime("%H:%M:%S_%d.%m-%Y", time.localtime())
51 for counter in range(numTraceFiles) :
52 thisTraceFileNo=(TraceFileNo+counter) % numTraceFiles
53 print "current TraceFileNo = %s" % TraceFileNo
54 if verbose : print "Saving log %s-%s to %s-%s-%s" % (Output,thisTraceFileNo, Output,counter, NowStr)
55 copyfile("%s-%s" % (Output, thisTraceFileNo), "%s-%s-%s" % ( Output,counter, NowStr))
56 # also save ProcessTable of this interval
57 if verbose : print "Saving ProcessTable to %s-%s" % (ProcessTableFilename,NowStr)
58 f=file("%s-%s" % (ProcessTableFilename,NowStr), "w+")
59 _PIDs=ProcessTable.keys()
65 f.write("%s %s %s\n" % (pid, ProcessTable["%s" % pid]["parentpid"],ProcessTable["%s" % pid]["cmd"]))
69 def updateProcessTable():
71 call=subprocess.Popen([PSBIN, "-eaf"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
72 line=call.stdout.readline() # ignore headerline
74 line=call.stdout.readline()
77 if len(tokens) < 8 : continue
80 cmd=string.join(tokens[7:], " ")
81 # we do make the assumption that a pid is not reused within a rotationinterval.
82 ProcessTable[pid] ={"cmd" : cmd}
83 ProcessTable[pid]["parentpid"] = parentpid
86 def getChildPid(pid,ProcessTable) :
88 for _pid in ProcessTable :
89 if ProcessTable[_pid]["parentpid"] == "%s" % pid :
91 pids=ProcessTable.keys()
95 print "%s : %s" % (p,ProcessTable[p])
98 def killPid(pid,signal="TERM") :
99 rc=subprocess.call([KILLBIN, "-%s" % signal,pid])
105 FileName=TraceFileNames[TraceFileNo]
107 if debug : print "Spawning dump-process to %s" % FileName
108 DumpProcs[DumpProcNum] = Process(target=dumpFSTrace, args=(FileName,))
109 DumpProcs[DumpProcNum].start()
110 if Mode == "single" : # single-shot mode
112 if debug : print "Waiting for %s until next check for external Event." % EventCheckInterval
113 ProcessTable=updateProcessTable()
114 if externalEvent(EventFile) :
115 DumpProcs[DumpProcNum].terminate()
116 ProcessTable=updateProcessTable()
118 print "Killing childpids of %s : %s " % (DumpProcs[DumpProcNum].pid,getChildPid("%s" % DumpProcs[DumpProcNum].pid,ProcessTable))
119 for kid in getChildPid("%s" % DumpProcs[DumpProcNum].pid,ProcessTable) :
121 saveLogs(ProcessTable)
123 time.sleep(EventCheckInterval)
128 while Waited < RotationInterval :
129 if debug : print "Waiting for %s until next check for external Event." % EventCheckInterval
130 time.sleep(EventCheckInterval)
132 ProcessTable=updateProcessTable()
134 print "Waitcount = %s" % Waited
135 print "RotationInterval %s" % RotationInterval
136 print "ProcessTable :"
137 print len(ProcessTable)
138 if externalEvent(EventFile) :
139 saveLogs(ProcessTable, TraceFileNo)
140 if debug : print "Rotating Logs"
141 TraceFileNo = (TraceFileNo+1) % (numTraceFiles)
142 FileName=TraceFileNames[TraceFileNo]
143 newDumpProcNum= (DumpProcNum +1) % 2
144 if debug : print "Spawning next dump-process no %s to %s" % (newDumpProcNum, FileName)
145 DumpProcs[newDumpProcNum] = Process(target=dumpFSTrace, args=(FileName,))
146 DumpProcs[newDumpProcNum].start()
147 ProcessTable=updateProcessTable()
148 if debug : print "Terminating last dump-process %s" % DumpProcNum
149 DumpProcs[DumpProcNum].terminate()
151 print "Killing childpids of %s : %s " % (DumpProcs[DumpProcNum].pid,getChildPid("%s" % DumpProcs[DumpProcNum].pid,ProcessTable))
152 for kid in getChildPid("%s" % DumpProcs[DumpProcNum].pid,ProcessTable) :
153 if debug : print ProcessTable[kid]
155 if debug: print DumpProcs
156 DumpProcNum=newDumpProcNum
160 # This function has to be replaced for new debugging cases
163 def externalEvent(EventFile):
164 if os.path.isfile(EventFile) :
166 if verbose : print "external event happened!"
168 if debug : print "no external event"
172 print "ClientTracing.py --dumpinterval=# --rotationinterval=# --buffersize=# --numfiles # --mode=[cont|single] --savemode=[lastonly|all] --verbose --debug"
173 print "--dumpinterval=# / secs : interval of fstrace in secs to dump information"
174 print "--eventcheckinterval=# / secs : How many secondes to wait between checking for an event"
175 print "--Eventfile=<name> : file to check if an external event happened"
176 print "--rotationinterval=# /units EventCheckInterval"
177 print "--buffersize=# buffer of fstrace log in memory"
178 print "--numfiles # how many logfiles to write"
179 print "--mode=[cont|single]"
181 print "An event is triggered by the existance of the file %s\n" % EventFile
182 print "You may also overwrite the function externalEvent()"
185 if __name__ == "__main__" :
187 opts, args = getopt.getopt(sys.argv[1:], "he:E:d:r:m:b:vdn:s:", ["help", "eventcheckinterval=","Eventfile=","dumpinterval=", "rotationinterval=", "mode=", "buffersize=","verbose", "debug","numfiles=", "savemode="])
188 except getopt.GetoptError, err:
189 # print help information and exit:
190 print str(err) # will print something like "option -a not recognized"
194 if o in ("-v", "--verbose"):
196 elif o in ("-d", "--debug") :
198 elif o in ("-h", "--help"):
201 elif o in ("-e", "--eventcheckinterval") :
202 EventCheckInterval = int(a)
203 elif o in ("-E", "--Eventfile") :
205 elif o in ("-d", "--dumpinterval") :
206 Dumpinterval = int(a)
207 elif o in ('-r', "--rotationinterval") :
208 RotationInterval = int(a)
209 elif o in ("-b", "--buffersize") :
210 TraceBufferSize=int(a)
211 elif o in ("-m", "--mode") :
213 if not Mode in ['cont','single'] :
214 print "mode takes only >cont< or >single< "
216 elif o in ("-o", "--output") :
218 elif o in ("-n", "--numfiles") :
221 print "Unknown option %s" % o
223 for i in range(int(numTraceFiles)) :
224 TraceFileNames.append("%s-%s"% (Output, i))
227 print "Buffersize: %s" % TraceBufferSize
228 print "Dumpinterval: %s" % Dumpinterval
229 print "number of Tracfiles : %s" % numTraceFiles
230 print "TraceFilenames: %s-#" % Output
231 print "Rotationinterval: %s" % RotationInterval
232 print "ProcessTableFilename=%s" % ProcessTableFilename
233 print "Mode: %s" % Mode