-
Notifications
You must be signed in to change notification settings - Fork 1
/
farmoutJobMonitor
executable file
·125 lines (103 loc) · 3.95 KB
/
farmoutJobMonitor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
#
# Essentially this runs condor_q <screen name>
# once every minute and stores that in a log file.
# Then runs a script to create a plot of that log file.
#
# Michael Anderson
# May 19, 2008
##################################################
##################################################
# Variables
#
# location to store the log file of jobs running
FARMOUT_USER=${FARMOUT_USER:-${USER}}
logFile="/afs/hep.wisc.edu/home/${FARMOUT_USER}/jobMonitor.log"
# max length of time to run this monitor (in minutes)
# BUT KNOW: the monitor will stop when jobs on queue = 0
# OR when time running > maxTimeToMonitor
maxTimeToMonitor=7200
# OPTIONAL: Copy image plot to this directory,
# leave blank for no copy.
imageCopyDir="/cms/www/comp/farmout_users/userJobsPlots/"
# This following is a script that loads the
# correct version of gnuplot into the user's
# path (either 32 or 64-bit). Needed by updateJobMonitorGraph.pl
source /afs/hep/cms/sw/gnuplot/setup.sh
##################################################
##################################################
# See if the job monitor is running.
# First see if the log file exists.
if [ -f "$logFile" ]; then
# check if log file has been modified recently?
logLastModified=`ls -l --time-style=+%s $logFile | awk '{print $6}'`
timeSinceLastMod=$((`date +%s`-$logLastModified))
if [[ "$timeSinceLastMod" -lt "600" ]]; then
# it's been modified recently, so it
# could still be running, so quit.
# For less confusion Give a warning if it's running somewhere else
lastHostname=`grep "Hostname" $logFile | awk '{print $2}'`
if [[ "$lastHostname" != "$HOSTNAME" ]]; then
echo "farmoutJobMonitor appears to be running on another machine, $lastHostname"
echo "A new instance cannot be started on this machine, $HOSTNAME"
echo "See $logFile"
fi
exit
fi
fi
##################################################
##################################################
# Set up user's web directory (public_html)
publichtmlDir="/afs/hep.wisc.edu/home/${FARMOUT_USER}/public_html"
wwwDir="/afs/hep.wisc.edu/home/${FARMOUT_USER}/www"
# Do they have a public_html?
if [[ ! -d "$publichtmlDir" ]]; then
# No public_html. Do they have a www?
if [[ -d "$wwwDir" ]]; then
# Yes www. Make public_html as soft-link to www
ln -s $wwwDir $publichtmlDir
else
# No www. Make public_html
mkdir -p $publichtmlDir
fi
fi
# Create new log file
echo "#PID $$" > $logFile
echo "#Log of jobs running on condor from this machine," >> $logFile
echo "#Hostname: $HOSTNAME" >> $logFile
echo "#Started: "`date` >> $logFile
echo "MonthDayHourMin Total Idle Running Held" >> $logFile
##################################################
##################################################
# Run for a certain number of minutes
for ((i=0;i<=$maxTimeToMonitor;i+=1)); do
# Update the log file containing jobs running, etc...
# This will look something like:
# "10 jobs; 1 idle, 8 running, 1 held"
jobString="`condor_q ${FARMOUT_USER} | tail -1`"
totalJobs=`echo $jobString | awk '{print $1}'`
# This writes a string to the log file that looks like
# <month>-<day>-<hour>:<min> <total> <idle> <running> <held>
# example: "15:36 10 1 8 1"
echo `date +%m-%d-%H:%M`" "`echo $jobString | awk '{print $1" "$3" "$5" "$7}'` >> $logFile
# Run the plot updater every 2 minutes
if [ $((i % 2)) -eq 1 ]; then
/cms/cmsprod/bin/updateJobMonitorGraph.pl
# Copy the user's job plot
if [ -n "$imageCopyDir" ] ; then
cp /afs/hep.wisc.edu/home/${FARMOUT_USER}/public_html/jobMonitor.png $imageCopyDir/${FARMOUT_USER}.png
fi
# If there are no jobs left, quit
if [ "$totalJobs" -eq 0 ]; then
echo '# totalJobs='$totalJobs', quitting.' >> $logFile
exit
fi
# It takes a little bit for the updater to finish,
# so sleep less than normal.
sleep 59
else
# Sleep for 60 seconds
sleep 60
fi
done
##################################################