求助:RHCS的集群服務就是切換不了!
我兩台機器,做ftp的集群,集群配置文件如下:
# more /etc/hosts (vm001和vm002內容一樣的)
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost
192.168.0.201 vm001
192.168.0.202 vm002
# ifconfig
bond0 Link encap:Ethernet HWaddr 00:0C:29:58:FC:EB
inet addr:192.168.1.10 Bcast:192.168.1.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fe58:fceb/64 Scope:Link
UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
RX packets:5140 errors:0 dropped:0 overruns:0 frame:0
TX packets:25 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:473116 (462.0 KiB) TX bytes:4127 (4.0 KiB)
eth0 Link encap:Ethernet HWaddr 00:0C:29:58:FC:E1
inet addr:192.168.0.201 Bcast:192.168.0.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fe58:fce1/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:5363 errors:0 dropped:0 overruns:0 frame:0
TX packets:1721 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:506374 (494.5 KiB) TX bytes:148120 (144.6 KiB)
Interrupt:177 Base address:0x1400
eth1 Link encap:Ethernet HWaddr 00:0C:29:58:FC:EB
inet6 addr: fe80::20c:29ff:fe58:fceb/64 Scope:Link
UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
RX packets:5140 errors:0 dropped:0 overruns:0 frame:0
TX packets:25 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:473116 (462.0 KiB) TX bytes:4127 (4.0 KiB)
Interrupt:185 Base address:0x1480
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:116 errors:0 dropped:0 overruns:0 frame:0
TX packets:116 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:8618 (8.4 KiB) TX bytes:8618 (8.4 KiB)
# ifconfig
bond0 Link encap:Ethernet HWaddr 00:0C:29:7D:5B:8C
inet addr:192.168.1.10 Bcast:192.168.1.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fe7d:5b8c/64 Scope:Link
UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
RX packets:5134 errors:0 dropped:0 overruns:0 frame:0
TX packets:34 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:473908 (462.8 KiB) TX bytes:3261 (3.1 KiB)
eth0 Link encap:Ethernet HWaddr 00:0C:29:7D:5B:82
inet addr:192.168.0.202 Bcast:192.168.0.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fe7d:5b82/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:6184 errors:0 dropped:0 overruns:0 frame:0
TX packets:3294 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:559433 (546.3 KiB) TX bytes:299933 (292.9 KiB)
Interrupt:177 Base address:0x1400
eth1 Link encap:Ethernet HWaddr 00:0C:29:7D:5B:8C
inet6 addr: fe80::20c:29ff:fe7d:5b8c/64 Scope:Link
UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
RX packets:5134 errors:0 dropped:0 overruns:0 frame:0
TX packets:34 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:473908 (462.8 KiB) TX bytes:3261 (3.1 KiB)
Interrupt:185 Base address:0x1480
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:14703 errors:0 dropped:0 overruns:0 frame:0
TX packets:14703 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:998684 (975.2 KiB) TX bytes:998684 (975.2 KiB)
# fdisk -l
Disk /dev/sda: 3221 MB, 3221225472 bytes
255 heads, 63 sectors/track, 391 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Device Boot Start End Blocks Id System
/dev/sda1 * 1 13 104391 83 Linux
/dev/sda2 14 359 2779245 83 Linux
/dev/sda3 360 391 257040 82 Linux swap
Disk /dev/sdb: 214 MB, 214748160 bytes
64 heads, 32 sectors/track, 204 cylinders
Units = cylinders of 2048 * 512 = 1048576 bytes
Device Boot Start End Blocks Id System
/dev/sdb1 1 49 50160 83 Linux
/dev/sdb2 50 98 50176 83 Linux
# more /etc/cluster/cluster.conf 兩台機器配置一樣
<?xml version="1.0" ?>
<cluster alias="zcbcluster" config_version="21" name="alpha_cluster">
<fence_daemon post_fail_delay="0" post_join_delay="3"/>
<clusternodes>
<clusternode name="vm001" votes="1">
<fence>
<method name="1">
<device name="clusterfence" nodename="vm001"/>
</method>
</fence>
</clusternode>
<clusternode name="vm002" votes="1">
<fence>
<method name="1">
<device name="clusterfence" nodename="vm002"/>
</method>
</fence>
</clusternode>
</clusternodes>
<cman expected_votes="1" two_node="1"/>
<fencedevices>
<fencedevice agent="fence_manual" name="clusterfence"/>
</fencedevices>
<rm>
<failoverdomains>
<failoverdomain name="ftp-domain" ordered="1" restricted="1">
<failoverdomainnode name="vm001" priority="1"/>
<failoverdomainnode name="vm002" priority="2"/>
</failoverdomain>
</failoverdomains>
<resources/>
<service autostart="1" domain="ftp-domain" name="ftpservice" recovery="relocate">
<ip address="192.168.0.203" monitor_link="1"/>
<fs device="/dev/sdb1" force_fsck="1" force_unmount="1" fsid="37872" fstype="ext3" mo
untpoint="/ftp" name="ftp-content" options="rw" self_fence="1"/>
<script file="/etc/rc.d/init.d/vsftpdHA.sh" name="ftpdHA"/>
</service>
</rm>
</cluster>
# more /etc/init.d/vsftpdHA.sh
#!/bin/bash
#
# vsftpd This shell script takes care of starting and stopping
# standalone vsftpd.
#
# chkconfig: - 60 50
# description: Vsftpd is a ftp daemon, which is the program \
# that answers incoming ftp service requests.
# processname: vsftpd
# config: /etc/vsftpd/vsftpd.conf
# Source function library.
. /etc/rc.d/init.d/functions
# Source networking configuration.
. /etc/sysconfig/network
# Check that networking is up.
[ ${NETWORKING} = "no" ] && exit 0
[ -x /usr/sbin/vsftpd ] || exit 0
RETVAL=0
prog="vsftpd"
start() {
# Start daemons.
if [ -d /etc/vsftpd ] ; then
for i in `ls /etc/vsftpd/*.conf`; do
site=`basename $i .conf`
echo -n $"Starting $prog for $site: "
/usr/sbin/vsftpd $i &
RETVAL=$?
[ $RETVAL -eq 0 ] && {
touch /var/lock/subsys/$prog
success $"$prog $site"
}
echo
done
else
RETVAL=1
fi
return $RETVAL
}
stop() {
# Stop daemons.
echo -n $"Shutting down $prog: "
killproc $prog
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/$prog
return $RETVAL
}
# See how we were called.
case "$1" in
start)
start
;;
stop)
stop
;;
restart|reload)
stop
start
RETVAL=$?
;;
condrestart)
if [ -f /var/lock/subsys/$prog ]; then
stop
start
RETVAL=$?
fi
;;
status)
status $prog
RETVAL=$?
;;
*)
echo $"Usage: $0 {start|stop|restart|condrestart|status}"
exit 1
esac
exit $RETVAL
兩台機器都啟動的時候,第一台機器起了ftp服務,
# clustat -i 3
Member Status: Quorate
Member Name Status
------ ---- ------
vm001 Online, Local, rgmanager
vm002 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
ftpservice vm001 started
可是我service vsftpdHA.sh stop的時候,第2台機器,就不能起ftp服務,但是第一台機器已經把浮動IP、掛接都停下來了,就是切換不到第2台機器?
# ps -ef |grep ftp
root 3895 1 0 10:16 ? 00:00:00 /usr/sbin/vsftpd /etc/vsftpd/vsftpd.conf
root 4063 2801 0 10:17 pts/0 00:00:00 grep ftp
# service vsftpdHA.sh stop
Shutting down vsftpd: [ OK ]
# ps -ef |grep ftp
root 4077 2801 0 10:17 pts/0 00:00:00 grep ftp
# clustat -i 3
Member Status: Quorate
Member Name Status
------ ---- ------
vm001 Online, rgmanager
vm002 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
ftpservice (none) recoverable
過了一段時間之後,就會出現:
Member Status: Quorate
Member Name Status
------ ---- ------
vm001 Online, rgmanager
vm002 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
ftpservice (vm001) failed
vm002就是起不來ftp服務,請問為什麼啊?
[ 本帖最後由 SUNfan 於 2006-12-21 14:53 編輯 ]
《解決方案》
日誌?
《解決方案》
# ps -ef |grep ftp
root 3909 1 0 13:01 ? 00:00:00 /usr/sbin/vsftpd /etc/vsftpd/vsftpd.conf
root 4230 4158 0 13:01 pts/0 00:00:00 grep ftp
# service vsftpdHA.sh stop
Shutting down vsftpd: [ OK ]
# ps -ef |grep ftp
root 4275 4158 0 13:02 pts/0 00:00:00 grep ftp
#
# tail -30 messages
Dec 21 13:00:55 vm001 vsftpdHA.sh: vsftpd shutdown failed
Dec 21 13:00:55 vm001 clurgmgrd: <notice> stop on script "ftpHA" returned 1 (generic error)
Dec 21 13:00:55 vm001 clurgmgrd: <info> Services Initialized
Dec 21 13:00:55 vm001 clurgmgrd: <info> Logged in SG "usrm::manager"
Dec 21 13:00:55 vm001 clurgmgrd: <info> Magma Event: Membership Change
Dec 21 13:00:55 vm001 clurgmgrd: <info> State change: Local UP
Dec 21 13:00:58 vm001 clurgmgrd: <info> Magma Event: Membership Change
Dec 21 13:00:58 vm001 clurgmgrd: <info> State change: vm002 UP
Dec 21 13:00:59 vm001 clurgmgrd: <notice> Starting stopped service ftpservice
Dec 21 13:00:59 vm001 clurgmgrd: : <info> Adding IPv4 address 192.168.0.203 to eth0
Dec 21 13:01:01 vm001 clurgmgrd: : <info> mounting /dev/sdb1 on /ftp
Dec 21 13:01:01 vm001 kernel: kjournald starting. Commit interval 5 seconds
Dec 21 13:01:01 vm001 kernel: EXT3 FS on sdb1, internal journal
Dec 21 13:01:01 vm001 kernel: EXT3-fs: mounted filesystem with ordered data mode.
Dec 21 13:01:01 vm001 clurgmgrd: : <info> Executing /etc/rc.d/init.d/vsftpdHA.sh start
Dec 21 13:01:01 vm001 vsftpdHA.sh: vsftpd vsftpd succeeded
Dec 21 13:01:01 vm001 clurgmgrd: <notice> Service ftpservice started
Dec 21 13:01:07 vm001 clurgmgrd: : <info> Executing /etc/rc.d/init.d/vsftpdHA.sh status
Dec 21 13:01:37 vm001 clurgmgrd: : <info> Executing /etc/rc.d/init.d/vsftpdHA.sh status
Dec 21 13:01:39 vm001 sshd(pam_unix): session opened for user root by root(uid=0)
Dec 21 13:01:59 vm001 vsftpdHA.sh: vsftpd shutdown succeeded
Dec 21 13:02:07 vm001 clurgmgrd: : <info> Executing /etc/rc.d/init.d/vsftpdHA.sh status
Dec 21 13:02:07 vm001 clurgmgrd: <notice> status on script "ftpHA" returned 3 (function not implemented)
Dec 21 13:02:07 vm001 clurgmgrd: <notice> Stopping service ftpservice
Dec 21 13:02:08 vm001 clurgmgrd: : <info> unmounting /ftp
Dec 21 13:02:08 vm001 clurgmgrd: : <info> Executing /etc/rc.d/init.d/vsftpdHA.sh stop
Dec 21 13:02:08 vm001 vsftpdHA.sh: vsftpd shutdown failed
Dec 21 13:02:08 vm001 clurgmgrd: <notice> stop on script "ftpHA" returned 1 (generic error)
Dec 21 13:02:08 vm001 clurgmgrd: <crit> #12: RG ftpservice failed to stop; intervention required
Dec 21 13:02:08 vm001 clurgmgrd: <notice> Service ftpservice is failed
《解決方案》
case "$1" in
start)
start
;;
stop)
stop
;;
改成
stop)
stop
exit 0
;;
就可以了
《解決方案》
呵呵,是可以重啟服務,謝謝!