Aacraid monitoring: различия между версиями
Sirmax (обсуждение | вклад) |
Sirmax (обсуждение | вклад) |
||
| (не показана 21 промежуточная версия этого же участника) | |||
| Строка 1: | Строка 1: | ||
| + | [[Категория:Linux]] |
||
| + | [[Категория:Hardware]] |
||
| + | [[Категория:Raid]] |
||
| + | [[Категория:Nagios]] |
||
| + | [[Категория:Мониторинг]] |
||
| + | [[Категория:Monitoring]] |
||
| + | [[Категория:SNMP]] |
||
=Adaptec Raid Monitoring= |
=Adaptec Raid Monitoring= |
||
| − | После года |
+ | После года эксплуатации мне пришла в голову мысль, что никак кроме как по состоянию "лампочек" я не узнаю, живые ли винты в рейде. |
А их у меня аж 2, в шасси от SuperMicro |
А их у меня аж 2, в шасси от SuperMicro |
||
<PRE> |
<PRE> |
||
| Строка 147: | Строка 154: | ||
==Использование== |
==Использование== |
||
| + | Все более-менее очевидно, думаю результат работы комманды не требует особых пояснений. |
||
| + | |||
| + | <PRE> |
||
| + | mordred ~ # arcconf GETCONFIG 1 AL |
||
| + | Controllers found: 1 |
||
| + | ---------------------------------------------------------------------- |
||
| + | Controller information |
||
| + | ---------------------------------------------------------------------- |
||
| + | Controller Status : Optimal |
||
| + | Channel description : SAS/SATA |
||
| + | Controller Model : Adaptec 4000SAS |
||
| + | Controller Serial Number : BAD0 |
||
| + | Physical Slot : 1 |
||
| + | Installed memory : 256 MB |
||
| + | Copyback : Disabled |
||
| + | Background consistency check : Disabled |
||
| + | Automatic Failover : Enabled |
||
| + | Defunct disk drive count : 0 |
||
| + | Logical devices/Failed/Degraded : 1/0/0 |
||
| + | -------------------------------------------------------- |
||
| + | Controller Version Information |
||
| + | -------------------------------------------------------- |
||
| + | BIOS : 5.1-0 (8461) |
||
| + | Firmware : 5.1-0 (8461) |
||
| + | Driver : 1.1-5 (2449) |
||
| + | Boot Flash : 0.0-0 (0) |
||
| + | -------------------------------------------------------- |
||
| + | Controller Battery Information |
||
| + | -------------------------------------------------------- |
||
| + | Status : Not Installed |
||
| + | |||
| + | ---------------------------------------------------------------------- |
||
| + | Logical device information |
||
| + | ---------------------------------------------------------------------- |
||
| + | Logical device number 0 |
||
| + | Logical device name : System |
||
| + | RAID level : 10 |
||
| + | Status of logical device : Optimal |
||
| + | Size : 279600 MB |
||
| + | Stripe-unit size : 256 KB |
||
| + | Read-cache mode : Enabled |
||
| + | Write-cache mode : Enabled (write-back) |
||
| + | Write-cache setting : Enabled (write-back) |
||
| + | Partitioned : Yes |
||
| + | Protected by Hot-Spare : No |
||
| + | Bootable : Yes |
||
| + | Failed stripes : No |
||
| + | -------------------------------------------------------- |
||
| + | Logical device segment information |
||
| + | -------------------------------------------------------- |
||
| + | Group 0, Segment 0 : Present (0,0) 3LQ1GXXX |
||
| + | Group 0, Segment 1 : Present (0,4) 3LQ1GXXX |
||
| + | Group 1, Segment 0 : Present (0,1) 3LQ1GXXX |
||
| + | Group 1, Segment 1 : Present (0,5) 3LQ1LXXX |
||
| + | Group 2, Segment 0 : Present (0,2) 3LQ1GXXX |
||
| + | Group 2, Segment 1 : Present (0,6) 3LQ1NXXX |
||
| + | Group 3, Segment 0 : Present (0,3) 3LQ1NXXX |
||
| + | Group 3, Segment 1 : Present (0,7) 3LQ1NXXX |
||
| + | |||
| + | |||
| + | ---------------------------------------------------------------------- |
||
| + | Physical Device information |
||
| + | ---------------------------------------------------------------------- |
||
| + | Device #0 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,0 |
||
| + | Reported Location : Enclosure 0, Slot 0 |
||
| + | Reported ESD : 2,0 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST373455SS |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1GXXX |
||
| + | World-wide name : 5000C50006493XXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #1 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,1 |
||
| + | Reported Location : Enclosure 0, Slot 1 |
||
| + | Reported ESD : 2,0 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1GZ79 |
||
| + | World-wide name : 5000C50006499XXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #2 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,2 |
||
| + | Reported Location : Enclosure 0, Slot 2 |
||
| + | Reported ESD : 2,0 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1GYDP |
||
| + | World-wide name : 5000C50006492XXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #3 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,3 |
||
| + | Reported Location : Enclosure 0, Slot 3 |
||
| + | Reported ESD : 2,0 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1NE5D |
||
| + | World-wide name : 5000C500064AFXXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #4 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,4 |
||
| + | Reported Location : Enclosure 1, Slot 4 |
||
| + | Reported ESD : 2,1 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1GXCS |
||
| + | World-wide name : 5000C50006493XXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #5 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,5 |
||
| + | Reported Location : Enclosure 1, Slot 5 |
||
| + | Reported ESD : 2,1 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1LMCV |
||
| + | World-wide name : 5000C5000649AXXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #6 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,6 |
||
| + | Reported Location : Enclosure 1, Slot 6 |
||
| + | Reported ESD : 2,1 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1NNSB |
||
| + | World-wide name : 5000C5000649AXXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #7 |
||
| + | Device is a Hard drive |
||
| + | State : Online |
||
| + | Supported : Yes |
||
| + | Transfer Speed : SAS 3.0 Gb/s |
||
| + | Reported Channel,Device : 0,7 |
||
| + | Reported Location : Enclosure 1, Slot 7 |
||
| + | Reported ESD : 2,1 |
||
| + | Vendor : SEAGATE |
||
| + | Model : ST37345XXX |
||
| + | Firmware : S515 |
||
| + | Serial number : 3LQ1NE9Q |
||
| + | World-wide name : 5000C500064B0XXX |
||
| + | Size : 70007 MB |
||
| + | Write Cache : Unknown |
||
| + | FRU : None |
||
| + | S.M.A.R.T. : No |
||
| + | Device #8 |
||
| + | Device is an Enclosure services device |
||
| + | Reported Channel,Device : 2,0 |
||
| + | Enclosure ID : 0 |
||
| + | Type : SES2 |
||
| + | Vendor : AMI |
||
| + | Model : MG9072 |
||
| + | Firmware : 0005 |
||
| + | Status of Enclosure services device |
||
| + | Temperature : Normal |
||
| + | Device #9 |
||
| + | Device is an Enclosure services device |
||
| + | Reported Channel,Device : 2,1 |
||
| + | Enclosure ID : 1 |
||
| + | Type : SES2 |
||
| + | Vendor : AMI |
||
| + | Model : MG9072 |
||
| + | Firmware : 0005 |
||
| + | Status of Enclosure services device |
||
| + | Temperature : Normal |
||
| + | |||
| + | |||
| + | Command completed successfully. |
||
| + | </PRE> |
||
| + | ==Интеграция с Nagios== |
||
| + | |||
| + | Для того что бы удаленно мониторить состояние массива я решил раз в 5 минут (cron) запускать комманду |
||
| + | <PRE> |
||
| + | mordred# arcconf GETCONFIG 1 AL |
||
| + | </PRE> |
||
| + | и разбирать ее результат. Значения возвращать через snmpd. |
||
| + | Думаю, нет смысла возвращать значения сколько винтов работает, или сколько не работает, единственная ошибка - уже повод принимать какие-то меры.<BR> |
||
| + | |||
| + | Написал небольшой скриптик, который запускается раз в 5 минут, и сохраняет результаты своей работы в файлах. |
||
| + | |||
| + | <PRE> |
||
| + | #!/bin/bash |
||
| + | |||
| + | ARCCONF=`/opt/bin/arcconf` |
||
| + | SUFFIX=`date +%d%m%Y%s` |
||
| + | TMP_FILE="/tmp/aaraid_status_"$SUFFIX".tmp" |
||
| + | rm -f $TMP_FILE >/dev/null 2>/dev/null |
||
| + | |||
| + | # Get data from contriller 1 |
||
| + | if $ARCCONF GETCONFIG 111 AL > $TMP_FILE |
||
| + | then |
||
| + | echo $? |
||
| + | /bin/cat $TMP_FILE | logger -i -t "aac_mon.sh" |
||
| + | LOGICAL_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $1}'` |
||
| + | FAILED_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $2}'` |
||
| + | DEGRADED_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $3}'` |
||
| + | DEV_OK=`/bin/cat $TMP_FILE | /bin/grep "Online" | /usr/bin/wc -l` |
||
| + | echo $LOGICAL_DEV > /tmp/logical_dev_number |
||
| + | echo $FAILED_DEV > /tmp/failed_dev_number |
||
| + | echo $DEGRADED_DEV > /tmp/degraded_dev_number |
||
| + | echo $DEV_OK > /tmp/online_dev_number |
||
| + | else |
||
| + | # Set "special" error number |
||
| + | echo 101 > /tmp/logical_dev_number |
||
| + | echo 101 > /tmp/failed_dev_number |
||
| + | echo 101 > /tmp/degraded_dev_number |
||
| + | echo 101 > /tmp/online_dev_number |
||
| + | fi |
||
| + | rm -f $TMP_FILE >/dev/null 2>/dev/null |
||
| + | </PRE> |
||
| + | |||
| + | Отдаю эти значения через snmpd |
||
| + | <PRE> |
||
| + | /etc/snmpd.conf |
||
| + | ... |
||
| + | pass .1.3.6.1.4.1.2022.1 /etc/snmp/aac_raidmon_snmp.sh |
||
| + | pass .1.3.6.1.4.1.2022.2 /etc/snmp/aac_raidmon_snmp.sh |
||
| + | pass .1.3.6.1.4.1.2022.3 /etc/snmp/aac_raidmon_snmp.sh |
||
| + | pass .1.3.6.1.4.1.2022.4 /etc/snmp/aac_raidmon_snmp.sh |
||
| + | ... |
||
| + | </PRE> |
||
| + | |||
| + | И простой скрипт для выдачи значений из файла (должен возвращать 3 строки - OID, тип и значение и обрабатывать аргументы -g, -n и -s (get, getnext и set) например ) |
||
| + | <PRE> |
||
| + | # ./aac_raidmon_snmp.sh -g .1.3.6.1.4.1.2022.2 |
||
| + | .1.3.6.1.4.1.2022.2 |
||
| + | integer |
||
| + | 0 |
||
| + | </PRE> |
||
| + | |||
| + | |||
| + | <PRE> |
||
| + | #!/bin/sh |
||
| + | |||
| + | case "$1" in |
||
| + | "-g") |
||
| + | #получить значение OID |
||
| + | echo $2 |
||
| + | echo integer |
||
| + | case "$2" in |
||
| + | ".1.3.6.1.4.1.2022.1") |
||
| + | # Logical dev number |
||
| + | cat /tmp/logical_dev_number |
||
| + | ;; |
||
| + | ".1.3.6.1.4.1.2022.2") |
||
| + | # Failed dev number |
||
| + | cat /tmp/failed_dev_number |
||
| + | ;; |
||
| + | ".1.3.6.1.4.1.2022.3") |
||
| + | # Degraded dev number |
||
| + | cat /tmp/degraded_dev_number |
||
| + | ;; |
||
| + | |||
| + | ".1.3.6.1.4.1.2022.4") |
||
| + | # Online dev number |
||
| + | cat /tmp/online_dev_number |
||
| + | ;; |
||
| + | esac |
||
| + | ;; |
||
| + | |||
| + | "-n") |
||
| + | #получить OID и значение следующего за OID обьекта |
||
| + | ;; |
||
| + | "-s") |
||
| + | #установить значение OID |
||
| + | ;; |
||
| + | *) |
||
| + | #exit 1 |
||
| + | ;; |
||
| + | esac |
||
| + | exit |
||
| + | </PRE> |
||
| + | |||
| + | |||
| + | Далее со стороны сервера мониторинга эти значения можно обрабатывть нагиосом так как удобно. |
||
| + | Я использую snmp v3 на этих серверах, соответвенно, в конфиге имею: |
||
| + | <PRE> |
||
| + | define service{ |
||
| + | use generic-service |
||
| + | host_name server_at_my_bomain |
||
| + | service_description RAID_Status_Logincal_Device_Number |
||
| + | is_volatile 0 |
||
| + | check_period 24x7 |
||
| + | max_check_attempts 3 |
||
| + | normal_check_interval 5 |
||
| + | retry_check_interval 1 |
||
| + | contact_groups main-server-admins |
||
| + | notification_interval 120 |
||
| + | notification_period 24x7 |
||
| + | notification_options w,u,c,r |
||
| + | check_command check_snmp_v3_data!my_username!my_secret!.1.3.6.1.4.1.2022.1!1:1!1:1 |
||
| + | } |
||
| + | </PRE> |
||
| + | Аналогично для 3 других значений. Т.к. на разных серверах они могут отличаться, то я решил отдавать через snmp сырые данные, а проверять из значения уже на уровне нагиоса. |
||
| + | Для того что б конфиг был проще сделал отдельную комманду для snmp_v3. |
||
| + | <PRE> |
||
| + | define command{ |
||
| + | command_name check_snmp_v3_data |
||
| + | command_line $USER1$/check_snmp -H $HOSTADDRESS$ -L authNoPriv -U $ARG1$ a MD5 -X $ARG2$ -A $ARG2$ -P 3 -o $ARG3$ -w $ARG4$ -c $ARG5$ |
||
| + | } |
||
| + | </PRE> |
||
| + | |||
| + | ==Результат работы== |
||
| + | В результате в нагиосе вижу |
||
| + | <PRE> |
||
| + | my_server HTTP OK 05-05-2009 13:00:12 5d 20h 52m 5s 1/3 HTTP OK - HTTP/1.1 302 Moved Temporarily - 0.011 second response time |
||
| + | PING OK 05-05-2009 12:58:14 5d 20h 53m 13s 1/3 PING OK - Packet loss = 0%, RTA = 0.42 ms |
||
| + | RAID_Status_Degraded_Device_Number OK 05-05-2009 12:57:23 0d 0h 4m 9s 1/3 SNMP OK - 0 |
||
| + | RAID_Status_Failed_Device_Nnumber OK 05-05-2009 12:58:06 0d 0h 14m 49s 1/3 SNMP OK - 0 |
||
| + | RAID_Status_Logincal_Device_Number OK 05-05-2009 12:58:09 0d 0h 13m 51s 1/3 SNMP OK - 1 |
||
| + | RAID_Status_Online_Device_Number OK 05-05-2009 13:01:06 0d 0h 5m 28s 1/3 SNMP OK - 8 |
||
| + | SNMPv3 OK 05-05-2009 12:58:08 0d 0h 53m 17s 1/3 SNMP OK - Timeticks: (425606) 1:10:56.06 |
||
| + | SSH OK 05-05-2009 13:00:12 5d 20h 51m 44s 1/3 SSH OK - OpenSSH_4.7 (protocol 2.0) |
||
| + | </PRE> |
||
Текущая версия на 15:14, 5 мая 2020
Adaptec Raid Monitoring
После года эксплуатации мне пришла в голову мысль, что никак кроме как по состоянию "лампочек" я не узнаю, живые ли винты в рейде. А их у меня аж 2, в шасси от SuperMicro
04:01.0 RAID bus controller: Adaptec AAC-RAID (rev 02)
Subsystem: Adaptec ASR-4000
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr+ Stepping+ SERR+ FastB2B- DisINTx-
Status: Cap+ 66MHz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 32 (250ns min, 250ns max), Cache Line Size: 32 bytes
Interrupt: pin A routed to IRQ 16
Region 0: Memory at d8200000 (64-bit, non-prefetchable) [size=2M]
Region 2: Memory at d8000000 (32-bit, non-prefetchable) [size=2M]
Region 4: Memory at c0000000 (32-bit, prefetchable) [size=256M]
Capabilities: [c0] Power Management version 2
Flags: PMEClk- DSI- D1+ D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [d0] Message Signalled Interrupts: Mask- 64bit+ Queue=0/1 Enable-
Address: 0000000000000000 Data: 0000
Capabilities: [e0] PCI-X non-bridge device
Command: DPERE- ERO- RBC=512 OST=4
Status: Dev=04:01.0 64bit+ 133MHz+ SCD- USC- DC=bridge DMMRBC=1024 DMOST=4 DMCRS=16 RSCEM- 266MHz- 533MHz-
Kernel driver in use: aacraid
Для управления есть утилита "от производителя" - Storage Manager
Установка
На обоих серверах установлена Gentoo, ebuild http://www.gentoo.ru/node/14090
Обратить внимаени на версии, возможно прийдется доработать. Кроме того мне пришлось заменить
SRC_URI_amd64="${SRC_URI_BASE}/${PN}_linux_x64_v${PV}.rpm"
на
SRC_URI_amd64="http://download.adaptec.com/raid/storage_manager/asm_linux_x64_v5_20_17414.rpm"
# Copyright 1999-2009 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Header: $
EAPI=2
inherit multilib rpm versionator
DESCRIPTION="Storage manager for Adaptec RAID controller"
HOMEPAGE="http://www.adaptec.com/en-US/downloads/"
LICENSE="Adaptec"
SLOT="0"
KEYWORDS="~amd64"
IUSE="X"
RESTRICT="mirror"
SRC_URI_BASE="http://download.adaptec.com/raid/storage_manager"
SRC_URI_amd64="${SRC_URI_BASE}/${PN}_linux_x64_v${PV}.rpm"
SRC_URI="amd64? ( ${SRC_URI_amd64} )"
RDEPEND="sys-libs/libstdc++-v3
!=sys-devel/gcc-3*
X? ( dev-java/sun-jdk:1.5[X] )
!X? ( dev-java/sun-jdk:1.5 )"
S="${WORKDIR}/usr/StorMan"
src_unpack() {
rpm_src_unpack
}
src_configure() {
# binpkg - nothing to do here
:;
}
src_compile() {
# binpkg - nothing to do here
:;
}
src_install() {
if use X ; then
cd "${S}" || die
insinto /opt/StorMan
doins index.html *.jar *.pps *.so
# StorMan needs the help inside of /opt/StorMan
doins -r help
into /opt
dobin "${FILESDIR}"/StorMan.sh
dosed "s:%LIBDIR%:/usr/$(get_libdir):" /opt/bin/StorMan.sh
dobin "${FILESDIR}"/StorAgnt.sh
dosed "s:%LIBDIR%:/usr/$(get_libdir):" /opt/bin/StorAgnt.sh
dosed 's:\(\.log=\):\1/var/log:g' /opt/StorMan/RaidLog.pps
fi
into /opt/StorMan
dobin {arc,hr}conf
dosym ../StorMan/bin/arcconf /opt/bin/arcconf
dosym ../StorMan/bin/hrconf /opt/bin/hrconf
dodoc README.TXT
}
Установка версии x86 практически не отличается от amd64, за исключением того, что номера версий и способы их именования отличаются. Это отражено в патче ниже:
--- asm-5.01.16862.ebuild 2009-03-04 13:48:02.000000000 +0300
+++ asm-5.20.17414.ebuild 2009-03-04 14:06:57.000000000 +0300
@@ -6,20 +6,21 @@
inherit multilib rpm versionator
+CH_PV="$(replace_all_version_separators _ ${PV})"
DESCRIPTION="Storage manager for Adaptec RAID controller"
HOMEPAGE="http://www.adaptec.com/en-US/downloads/"
LICENSE="Adaptec"
SLOT="0"
-KEYWORDS="~amd64"
+KEYWORDS="~x86" IUSE="X"
RESTRICT="mirror"
SRC_URI_BASE="http://download.adaptec.com/raid/storage_manager"
-SRC_URI_amd64="${SRC_URI_BASE}/${PN}_linux_x64_v${PV}.rpm"
+SRC_URI_x86="${SRC_URI_BASE}/${PN}_linux_x86_v${CH_PV}.rpm"
-SRC_URI="amd64? ( ${SRC_URI_amd64} )"
+SRC_URI="x86? ( ${SRC_URI_x86} )"
RDEPEND="sys-libs/libstdc++-v3
Использование
Все более-менее очевидно, думаю результат работы комманды не требует особых пояснений.
mordred ~ # arcconf GETCONFIG 1 AL
Controllers found: 1
----------------------------------------------------------------------
Controller information
----------------------------------------------------------------------
Controller Status : Optimal
Channel description : SAS/SATA
Controller Model : Adaptec 4000SAS
Controller Serial Number : BAD0
Physical Slot : 1
Installed memory : 256 MB
Copyback : Disabled
Background consistency check : Disabled
Automatic Failover : Enabled
Defunct disk drive count : 0
Logical devices/Failed/Degraded : 1/0/0
--------------------------------------------------------
Controller Version Information
--------------------------------------------------------
BIOS : 5.1-0 (8461)
Firmware : 5.1-0 (8461)
Driver : 1.1-5 (2449)
Boot Flash : 0.0-0 (0)
--------------------------------------------------------
Controller Battery Information
--------------------------------------------------------
Status : Not Installed
----------------------------------------------------------------------
Logical device information
----------------------------------------------------------------------
Logical device number 0
Logical device name : System
RAID level : 10
Status of logical device : Optimal
Size : 279600 MB
Stripe-unit size : 256 KB
Read-cache mode : Enabled
Write-cache mode : Enabled (write-back)
Write-cache setting : Enabled (write-back)
Partitioned : Yes
Protected by Hot-Spare : No
Bootable : Yes
Failed stripes : No
--------------------------------------------------------
Logical device segment information
--------------------------------------------------------
Group 0, Segment 0 : Present (0,0) 3LQ1GXXX
Group 0, Segment 1 : Present (0,4) 3LQ1GXXX
Group 1, Segment 0 : Present (0,1) 3LQ1GXXX
Group 1, Segment 1 : Present (0,5) 3LQ1LXXX
Group 2, Segment 0 : Present (0,2) 3LQ1GXXX
Group 2, Segment 1 : Present (0,6) 3LQ1NXXX
Group 3, Segment 0 : Present (0,3) 3LQ1NXXX
Group 3, Segment 1 : Present (0,7) 3LQ1NXXX
----------------------------------------------------------------------
Physical Device information
----------------------------------------------------------------------
Device #0
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,0
Reported Location : Enclosure 0, Slot 0
Reported ESD : 2,0
Vendor : SEAGATE
Model : ST373455SS
Firmware : S515
Serial number : 3LQ1GXXX
World-wide name : 5000C50006493XXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #1
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,1
Reported Location : Enclosure 0, Slot 1
Reported ESD : 2,0
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1GZ79
World-wide name : 5000C50006499XXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #2
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,2
Reported Location : Enclosure 0, Slot 2
Reported ESD : 2,0
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1GYDP
World-wide name : 5000C50006492XXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #3
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,3
Reported Location : Enclosure 0, Slot 3
Reported ESD : 2,0
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1NE5D
World-wide name : 5000C500064AFXXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #4
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,4
Reported Location : Enclosure 1, Slot 4
Reported ESD : 2,1
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1GXCS
World-wide name : 5000C50006493XXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #5
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,5
Reported Location : Enclosure 1, Slot 5
Reported ESD : 2,1
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1LMCV
World-wide name : 5000C5000649AXXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #6
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,6
Reported Location : Enclosure 1, Slot 6
Reported ESD : 2,1
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1NNSB
World-wide name : 5000C5000649AXXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #7
Device is a Hard drive
State : Online
Supported : Yes
Transfer Speed : SAS 3.0 Gb/s
Reported Channel,Device : 0,7
Reported Location : Enclosure 1, Slot 7
Reported ESD : 2,1
Vendor : SEAGATE
Model : ST37345XXX
Firmware : S515
Serial number : 3LQ1NE9Q
World-wide name : 5000C500064B0XXX
Size : 70007 MB
Write Cache : Unknown
FRU : None
S.M.A.R.T. : No
Device #8
Device is an Enclosure services device
Reported Channel,Device : 2,0
Enclosure ID : 0
Type : SES2
Vendor : AMI
Model : MG9072
Firmware : 0005
Status of Enclosure services device
Temperature : Normal
Device #9
Device is an Enclosure services device
Reported Channel,Device : 2,1
Enclosure ID : 1
Type : SES2
Vendor : AMI
Model : MG9072
Firmware : 0005
Status of Enclosure services device
Temperature : Normal
Command completed successfully.
Интеграция с Nagios
Для того что бы удаленно мониторить состояние массива я решил раз в 5 минут (cron) запускать комманду
mordred# arcconf GETCONFIG 1 AL
и разбирать ее результат. Значения возвращать через snmpd.
Думаю, нет смысла возвращать значения сколько винтов работает, или сколько не работает, единственная ошибка - уже повод принимать какие-то меры.
Написал небольшой скриптик, который запускается раз в 5 минут, и сохраняет результаты своей работы в файлах.
#!/bin/bash
ARCCONF=`/opt/bin/arcconf`
SUFFIX=`date +%d%m%Y%s`
TMP_FILE="/tmp/aaraid_status_"$SUFFIX".tmp"
rm -f $TMP_FILE >/dev/null 2>/dev/null
# Get data from contriller 1
if $ARCCONF GETCONFIG 111 AL > $TMP_FILE
then
echo $?
/bin/cat $TMP_FILE | logger -i -t "aac_mon.sh"
LOGICAL_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $1}'`
FAILED_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $2}'`
DEGRADED_DEV=`/bin/cat $TMP_FILE | /bin/grep "Logical devices/Failed/Degraded" | /usr/bin/awk '{print $4}' | /usr/bin/awk -F"/" '{ print $3}'`
DEV_OK=`/bin/cat $TMP_FILE | /bin/grep "Online" | /usr/bin/wc -l`
echo $LOGICAL_DEV > /tmp/logical_dev_number
echo $FAILED_DEV > /tmp/failed_dev_number
echo $DEGRADED_DEV > /tmp/degraded_dev_number
echo $DEV_OK > /tmp/online_dev_number
else
# Set "special" error number
echo 101 > /tmp/logical_dev_number
echo 101 > /tmp/failed_dev_number
echo 101 > /tmp/degraded_dev_number
echo 101 > /tmp/online_dev_number
fi
rm -f $TMP_FILE >/dev/null 2>/dev/null
Отдаю эти значения через snmpd
/etc/snmpd.conf ... pass .1.3.6.1.4.1.2022.1 /etc/snmp/aac_raidmon_snmp.sh pass .1.3.6.1.4.1.2022.2 /etc/snmp/aac_raidmon_snmp.sh pass .1.3.6.1.4.1.2022.3 /etc/snmp/aac_raidmon_snmp.sh pass .1.3.6.1.4.1.2022.4 /etc/snmp/aac_raidmon_snmp.sh ...
И простой скрипт для выдачи значений из файла (должен возвращать 3 строки - OID, тип и значение и обрабатывать аргументы -g, -n и -s (get, getnext и set) например )
# ./aac_raidmon_snmp.sh -g .1.3.6.1.4.1.2022.2 .1.3.6.1.4.1.2022.2 integer 0
#!/bin/sh
case "$1" in
"-g")
#получить значение OID
echo $2
echo integer
case "$2" in
".1.3.6.1.4.1.2022.1")
# Logical dev number
cat /tmp/logical_dev_number
;;
".1.3.6.1.4.1.2022.2")
# Failed dev number
cat /tmp/failed_dev_number
;;
".1.3.6.1.4.1.2022.3")
# Degraded dev number
cat /tmp/degraded_dev_number
;;
".1.3.6.1.4.1.2022.4")
# Online dev number
cat /tmp/online_dev_number
;;
esac
;;
"-n")
#получить OID и значение следующего за OID обьекта
;;
"-s")
#установить значение OID
;;
*)
#exit 1
;;
esac
exit
Далее со стороны сервера мониторинга эти значения можно обрабатывть нагиосом так как удобно.
Я использую snmp v3 на этих серверах, соответвенно, в конфиге имею:
define service{
use generic-service
host_name server_at_my_bomain
service_description RAID_Status_Logincal_Device_Number
is_volatile 0
check_period 24x7
max_check_attempts 3
normal_check_interval 5
retry_check_interval 1
contact_groups main-server-admins
notification_interval 120
notification_period 24x7
notification_options w,u,c,r
check_command check_snmp_v3_data!my_username!my_secret!.1.3.6.1.4.1.2022.1!1:1!1:1
}
Аналогично для 3 других значений. Т.к. на разных серверах они могут отличаться, то я решил отдавать через snmp сырые данные, а проверять из значения уже на уровне нагиоса. Для того что б конфиг был проще сделал отдельную комманду для snmp_v3.
define command{
command_name check_snmp_v3_data
command_line $USER1$/check_snmp -H $HOSTADDRESS$ -L authNoPriv -U $ARG1$ a MD5 -X $ARG2$ -A $ARG2$ -P 3 -o $ARG3$ -w $ARG4$ -c $ARG5$
}
Результат работы
В результате в нагиосе вижу
my_server HTTP OK 05-05-2009 13:00:12 5d 20h 52m 5s 1/3 HTTP OK - HTTP/1.1 302 Moved Temporarily - 0.011 second response time PING OK 05-05-2009 12:58:14 5d 20h 53m 13s 1/3 PING OK - Packet loss = 0%, RTA = 0.42 ms RAID_Status_Degraded_Device_Number OK 05-05-2009 12:57:23 0d 0h 4m 9s 1/3 SNMP OK - 0 RAID_Status_Failed_Device_Nnumber OK 05-05-2009 12:58:06 0d 0h 14m 49s 1/3 SNMP OK - 0 RAID_Status_Logincal_Device_Number OK 05-05-2009 12:58:09 0d 0h 13m 51s 1/3 SNMP OK - 1 RAID_Status_Online_Device_Number OK 05-05-2009 13:01:06 0d 0h 5m 28s 1/3 SNMP OK - 8 SNMPv3 OK 05-05-2009 12:58:08 0d 0h 53m 17s 1/3 SNMP OK - Timeticks: (425606) 1:10:56.06 SSH OK 05-05-2009 13:00:12 5d 20h 51m 44s 1/3 SSH OK - OpenSSH_4.7 (protocol 2.0)