使用monit监控rails应用

monit的安装,ubuntu可以直接安装

1
sudo apt-get install monit

这样就安装好了.然后我们看下配置文件,在 /etc/monit/monitrc :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
###############################################################################
## Monit control file
###############################################################################
##
## Comments begin with a '#' and extend through the end of the line. Keywords
## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'.
##
## Below you will find examples of some frequently used statements. For
## information about the control file and a complete list of statements and
## options, please have a look in the Monit manual.
##
##
###############################################################################
## Global section
###############################################################################
##
## Start Monit in the background (run as a daemon):
#
set daemon 30 # check services at 2-minute intervals, current is 30 second
# with start delay 240 # optional: delay the first check by 4-minutes (by
# # default Monit check immediately after Monit start)
#
#
## Set syslog logging with the 'daemon' facility. If the FACILITY option is
## omitted, Monit will use 'user' facility by default. If you want to log to
## a standalone log file instead, specify the full path to the log file
#
# set logfile syslog facility log_daemon
set logfile /var/log/monit.log
# 下面的是163的邮箱服务器,请注意端口是ssl或者非ssl情况.
# set mailserver smtp.163.com username "name@163.com" password "pwd" using sslv3
# 下面的这个是万网的邮箱服务器
set mailserver smtp.mxhichina.com username "name@domain.com" password "pwd" using sslv3 with timeout 30 seconds
# 这个是邮件接收者
set alert receive@mail.com
# 这个是邮件格式,可以自己定义.
set mail-format {
from: name@domain.com
subject: monit alert -- $EVENT $SERVICE
message: $EVENT Service $SERVICE
Date: $DATE
Action: $ACTION
Host: $HOST
Description: $DESCRIPTION
Your faithful employee,
Monit
}
## Set the location of the Monit id file which stores the unique id for the
## Monit instance. The id is generated and stored on first Monit start. By
## default the file is placed in $HOME/.monit.id.
#
# set idfile /var/.monit.id
set idfile /var/lib/monit/id
#
## Set the location of the Monit state file which saves monitoring states
## on each cycle. By default the file is placed in $HOME/.monit.state. If
## the state file is stored on a persistent filesystem, Monit will recover
## the monitoring state across reboots. If it is on temporary filesystem, the
## state will be lost on reboot which may be convenient in some situations.
#
set statefile /var/lib/monit/state
#
## Set the list of mail servers for alert delivery. Multiple servers may be
## specified using a comma separator. If the first mail server fails, Monit
# will use the second mail server in the list and so on. By default Monit uses
# port 25 - it is possible to override this with the PORT option.
## By default Monit will drop alert events if no mail servers are available.
## If you want to keep the alerts for later delivery retry, you can use the
## EVENTQUEUE statement. The base directory where undelivered alerts will be
## stored is specified by the BASEDIR option. You can limit the maximal queue
## size using the SLOTS option (if omitted, the queue is limited by space
## available in the back end filesystem).
#
set eventqueue
basedir /var/lib/monit/events # set the base directory where events will be stored
slots 100 # optionally limit the queue size
#
## Monit by default uses the following format for alerts if the the mail-format
## statement is missing::
## --8<--
## --8<--
##
## You can override this message format or parts of it, such as subject
## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc.
## are expanded at runtime. For example, to override the sender, use:
#
# set mail-format { from: monit@foo.bar }
# 监控网页应用
check system domain.com
if loadavg (1min) > 4 then alert
if loadavg (5min) > 2 then alert
if memory usage > 75% then alert
if swap usage > 25% then alert
if cpu usage (user) > 70% then alert
if cpu usage (system) > 30% then alert
if cpu usage (wait) > 20% then alert
#
## You can set alert recipients whom will receive alerts if/when a
## service defined in this file has errors. Alerts may be restricted on
## events by using a filter as in the second example below.
#
# set alert didmehh@163.com # receive all alerts
## Do not alert when Monit start,stop or perform a user initiated action
# set alert manager@foo.bar not on { instance, action }
#
#
## Monit has an embedded web server which can be used to view status of
## services monitored and manage services from a web interface. See the
## Monit Wiki if you want to enable SSL for the web server.
# 监控情况在网页上可视
set httpd port 2812 and
# use address localhost # only accept connection from localhost
# allow localhost # allow localhost to connect to the server and
# allow admin:monit # require user 'admin' with password 'monit'
allow admin:monit # allow users of group 'monit' to connect (rw)
# allow @users readonly # allow users of group 'users' to connect readonly
#
# monit postgresql
check process web_postgresql with pidfile /var/run/postgresql/9.3-main.pid
start program = "/etc/init.d/postgresql start" with timeout 60 seconds
stop program = "/etc/init.d/postgresql stop"
# monit nginx
check process web_nginx with pidfile /run/nginx.pid
start program = "/etc/init.d/nginx start" with timeout 60 seconds
stop program = "/etc/init.d/nginx stop"
# monit redis
check process web_redis with pidfile /var/run/redis_server.pid
start program = "/etc/init.d/redis_server start" with timeout 60 seconds
stop program = "/etc/init.d/redis_server stop"
# monit puma
check process web_puma with pidfile /home/user/www/app/shared/tmp/pids/puma.pid
# monit sidekiq
check process web_sidekiq with pidfile /home/user/www/app/shared/tmp/pids/sidekiq.pid
# 监控硬盘
check filesystem datafs with path /dev/xvda1
start program = "/bin/mount /data"
stop program = "/bin/umount /data"
if failed permission 660 then unmonitor
if failed uid root then unmonitor
if failed gid disk then unmonitor
if space usage > 80% for 5 times within 15 cycles then alert
if space usage > 99% then stop
if inode usage > 30000 then alert
if inode usage > 99% then stop
group server
# 监控home空间
#check device home with path /home/pinee
# if SPACE usage > 0% then alert
###############################################################################
## Services
###############################################################################
##
## Check general system resources such as load average, cpu and memory
## usage. Each test specifies a resource, conditions and the action to be
## performed should a test fail.
#
# check system myhost.mydomain.tld
# if loadavg (1min) > 4 then alert
# if loadavg (5min) > 2 then alert
# if memory usage > 75% then alert
# if swap usage > 25% then alert
# if cpu usage (user) > 70% then alert
# if cpu usage (system) > 30% then alert
# if cpu usage (wait) > 20% then alert
#
#
## Check if a file exists, checksum, permissions, uid and gid. In addition
## to alert recipients in the global section, customized alert can be sent to
## additional recipients by specifying a local alert handler. The service may
## be grouped using the GROUP option. More than one group can be specified by
## repeating the 'group name' statement.
#
# check file apache_bin with path /usr/local/apache/bin/httpd
# if failed checksum and
# expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor
# if failed permission 755 then unmonitor
# if failed uid root then unmonitor
# if failed gid root then unmonitor
# alert security@foo.bar on {
# checksum, permission, uid, gid, unmonitor
# } with the mail-format { subject: Alarm! }
# group server
#
#
## Check that a process is running, in this case Apache, and that it respond
## to HTTP and HTTPS requests. Check its resource usage such as cpu and memory,
## and number of children. If the process is not running, Monit will restart
## it by default. In case the service is restarted very often and the
## problem remains, it is possible to disable monitoring using the TIMEOUT
## statement. This service depends on another service (apache_bin) which
## is defined above.
#
# check process apache with pidfile /usr/local/apache/logs/httpd.pid
# start program = "/etc/init.d/httpd start" with timeout 60 seconds
# stop program = "/etc/init.d/httpd stop"
# if cpu > 60% for 2 cycles then alert
# if cpu > 80% for 5 cycles then restart
# if totalmem > 200.0 MB for 5 cycles then restart
# if children > 250 then restart
# if loadavg(5min) greater than 10 for 8 cycles then stop
# if failed host www.tildeslash.com port 80 protocol http
# and request "/somefile.html"
# then restart
# if failed port 443 type tcpssl protocol http
# with timeout 15 seconds
# then restart
# if 3 restarts within 5 cycles then timeout
# depends on apache_bin
# group server
#
#
## Check filesystem permissions, uid, gid, space and inode usage. Other services,
## such as databases, may depend on this resource and an automatically graceful
## stop may be cascaded to them before the filesystem will become full and data
## lost.
#
# check filesystem datafs with path /dev/sdb1
# start program = "/bin/mount /data"
# stop program = "/bin/umount /data"
# if failed permission 660 then unmonitor
# if failed uid root then unmonitor
# if failed gid disk then unmonitor
# if space usage > 80% for 5 times within 15 cycles then alert
# if space usage > 99% then stop
# if inode usage > 30000 then alert
# if inode usage > 99% then stop
# group server
#
#
## Check a file's timestamp. In this example, we test if a file is older
## than 15 minutes and assume something is wrong if its not updated. Also,
## if the file size exceed a given limit, execute a script
#
# check file database with path /data/mydatabase.db
# if failed permission 700 then alert
# if failed uid data then alert
# if failed gid data then alert
# if timestamp > 15 minutes then alert
# if size > 100 MB then exec "/my/cleanup/script" as uid dba and gid dba
#
#
## Check directory permission, uid and gid. An event is triggered if the
## directory does not belong to the user with uid 0 and gid 0. In addition,
## the permissions have to match the octal description of 755 (see chmod(1)).
#
## Check a remote host availability by issuing a ping test and check the
## content of a response from a web server. Up to three pings are sent and
## connection to a port and an application level network check is performed.
#
# check host myserver with address 192.168.1.1
# if failed icmp type echo count 3 with timeout 3 seconds then alert
# if failed port 3306 protocol mysql with timeout 15 seconds then alert
# if failed url http://user:password@www.foo.bar:8080/?querystring
# and content == 'action="j_security_check"'
# then alert
#
#
###############################################################################
## Includes
###############################################################################
##
## It is possible to include additional configuration parts from other files or
## directories.
#
include /etc/monit/conf.d/*
#
  • 上面这些是基本的配置.然后我们得重新加载monit
1
sudo service monit reload

- 然后重启一下

1
sudo service monit restart