-
Notifications
You must be signed in to change notification settings - Fork 24
/
TimedBurnin.rb
149 lines (110 loc) · 4.44 KB
/
TimedBurnin.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
class TimedBurnin
include Genesis::Framework::Task
description "Performs burnin for a specified duration"
precondition "has asset tag?" do
not facter['asset_tag'].nil?
end
precondition "is running in burnin mode?" do
ENV['GENESIS_MODE'] == "burnin" or ENV['GENESIS_MODE'] == "classic"
end
init do
install :rpm, "breakin", "hpl", "screen"
end
run do
burnin_duration = 48
burnin_cmd = "/usr/bin/screen -dmS genesis_burnin /etc/breakin/startup.sh"
log "Executing new system burnin for #{burnin_duration} hours..."
start = Time.now
begin
log "Burnin starting via command: #{burnin_cmd}"
# this is a print rather than a log as Collins has timestamps already
print "Burnin started at: #{Time.now}"
# turning on the IPMI light to blinking
run_cmd("/usr/bin/ipmitool chassis identify 255")
# begin burnin process
run_cmd(burnin_cmd)
# for a simple logging buffer, we log every
# logging_interval loops of the below do/while loop
# (aka every 2hrs with current 3m sleep duration)
logging_interval = 40
# starting at 39 so it will write a log after first sleep duration
logging_counter = 39
# this has to stay below ~200 seconds to be safe as the max time we
# can pass to the ipmitool is 255 for it to keep the light blinking
sleep_duration = 3*60
loop_continue = true
begin
sleep sleep_duration
logging_counter += 1
runtime_seconds = Time.now - start
runtime_hours = runtime_seconds.to_i / 3600
self.parse_breakin_state
successful_tests = self.get_breakin_success_count
failed_tests = self.get_breakin_failure_count
if failed_tests > 0
run_cmd("/usr/bin/ipmitool chassis identify force")
self.log_failed_tests
loop_continue = false
else
# refresh the IPMI blinker fluid
run_cmd("/usr/bin/ipmitool chassis identify 255")
if logging_counter > logging_interval
begin
log "Burnin has been executing for #{runtime_hours} hours with #{successful_tests} tests ran successfully..."
rescue Exception
# if collins is down, don't blow up the Burnin process
# we only rescue this one and not the other logging lines
# as this one is the only one which repeats multiple times
# during execution and we don't "want" it to break on failure
end
logging_counter = 0
end
if runtime_hours >= burnin_duration
# turn off the IPMI light
run_cmd("/usr/bin/ipmitool chassis identify 0")
# tell Collins we are done so we don't run Burnin automatically again on reboot
collins.set_attribute!(facter['asset_tag'], :BURNIN_COMPLETE, true)
log "Burnin complete! Machine will power off in 30 seconds (unless you hit cntrl-c now)..."
sleep 30
run_cmd("shutdown -h now")
end
end
end while loop_continue
rescue Exception => e
# turn the IPMI light on solid
run_cmd("/usr/bin/ipmitool chassis identify force")
log "The burnin process threw an exception... #{e.message}"
end
end
def self.parse_breakin_state
begin
breakin_data_file = '/var/run/breakin.dat'
@@breakin_state = Hash[File.read(breakin_data_file).scan(/(.+?)="(.*)"\n/)]
rescue Exception => e
log "Caught exception #{e.message} while trying to parse breakin state file!"
@@breakin_state ||= {}
end
end
def self.get_breakin_success_count
total_success = 0
burnin_qty = @@breakin_state["BURNIN_QTY"].to_i - 1
for test_id in 0..burnin_qty
total_success += @@breakin_state["BURNIN_#{test_id}_PASS_QTY"].to_i
end
total_success
end
def self.get_breakin_failure_count
@@breakin_state["BURNIN_TOTAL_FAIL_QTY"].to_i
end
def self.log_failed_tests
burnin_qty = @@breakin_state["BURNIN_QTY"].to_i - 1
for test_id in 0..burnin_qty
test_failures = @@breakin_state["BURNIN_#{test_id}_FAIL_QTY"].to_i
if test_failures > 0
test_name = @@breakin_state["BURNIN_#{test_id}_NAME"]
test_successes = @@breakin_state["BURNIN_#{test_id}_PASS_QTY"].to_i
log "Burnin test #{test_name} failed #{test_failures} times and succeeded #{test_successes} times..."
end
end
end
end