Ceph disk not online after a reboot
After a reboot of my Ceph test VM's a few OSD's didnt come online. When running ceph osd
tree I saw:
1[root@lib-cephosd1 ~]# ceph osd tree
2ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
3 -1 0.29984 root default
4 -2 0.09995 host lib-cephosd1
5 1 0.00999 osd.1 up 1.00000 1.00000
6 0 0.00999 osd.0 up 1.00000 1.00000
7 2 0.00999 osd.2 up 1.00000 1.00000
8 3 0.00999 osd.3 up 1.00000 1.00000
9 4 0.00999 osd.4 up 1.00000 1.00000
10 5 0.00999 osd.5 up 1.00000 1.00000
11 6 0.00999 osd.6 up 1.00000 1.00000
12 7 0.00999 osd.7 up 1.00000 1.00000
13 8 0.00999 osd.8 up 1.00000 1.00000
14 9 0.00999 osd.9 up 1.00000 1.00000
15 -3 0.09995 host lib-cephosd2
16 10 0.00999 osd.10 up 1.00000 1.00000
17 12 0.00999 osd.12 up 1.00000 1.00000
18 14 0.00999 osd.14 up 1.00000 1.00000
19 16 0.00999 osd.16 up 1.00000 1.00000
20 18 0.00999 osd.18 up 1.00000 1.00000
21 20 0.00999 osd.20 up 1.00000 1.00000
22 22 0.00999 osd.22 up 1.00000 1.00000
23 24 0.00999 osd.24 up 1.00000 1.00000
24 26 0.00999 osd.26 up 1.00000 1.00000
25 28 0.00999 osd.28 up 1.00000 1.00000
26 -4 0.09995 host lib-cephosd3
27 11 0.00999 osd.11 up 1.00000 1.00000
28 13 0.00999 osd.13 down 0 1.00000
29 15 0.00999 osd.15 up 1.00000 1.00000
30 17 0.00999 osd.17 up 1.00000 1.00000
31 19 0.00999 osd.19 up 1.00000 1.00000
32 21 0.00999 osd.21 up 1.00000 1.00000
33 23 0.00999 osd.23 up 1.00000 1.00000
34 25 0.00999 osd.25 up 1.00000 1.00000
35 27 0.00999 osd.27 up 1.00000 1.00000
36 29 0.00999 osd.29 up 1.00000 1.00000
37
We can see osd.13 is down on the node ceph-osd3.
On the node lib-cephosd3 I ran service ceph restart osd.13
But we greeted with:
1 [root@lib-cephosd3 ~]#service ceph restart osd.13
2 /etc/init.d/ceph: osd.13 not found (/etc/ceph/ceph.conf defines osd.11 osd.15 osd.17 osd.19 osd.21 osd.23 osd.25 osd.27 osd.29 , /var/lib/ceph defines osd.11 osd.15 osd.17 osd.19 osd.21 osd.23 osd.25 osd.27 osd.29)
Hmm.
Running ceph-disk list
shows the disk as being there:
1 [root@lib-cephosd3 ~]#ceph-disk list
2 WARNING:ceph-disk:Old blkid does not support ID_PART_ENTRY_* fields, trying sgdisk; may not correctly identify ceph volumes with dmcrypt
3 /dev/sda other, unknown
4 /dev/sdb other, unknown
5 /dev/vda :
6 /dev/vda1 other, xfs, mounted on /boot
7 /dev/vda2 other, LVM2_member
8 /dev/vdb :
9 /dev/vdb1 ceph data, active, cluster ceph, osd.11, journal /dev/vdb2
10 /dev/vdb2 ceph journal, for /dev/vdb1
11 /dev/vdc :
12 /dev/vdc1 ceph data, prepared, cluster ceph, osd.13, journal /dev/vdc2
13 /dev/vdc2 ceph journal, for /dev/vdc1
14 /dev/vdd :
15 /dev/vdd1 ceph data, active, cluster ceph, osd.15, journal /dev/vdd2
16 /dev/vdd2 ceph journal, for /dev/vdd1
17 /dev/vde :
18 /dev/vde1 ceph data, active, cluster ceph, osd.17, journal /dev/vde2
19 /dev/vde2 ceph journal, for /dev/vde1
20 /dev/vdf :
21 /dev/vdf1 ceph data, active, cluster ceph, osd.19, journal /dev/vdf2
22 /dev/vdf2 ceph journal, for /dev/vdf1
23 /dev/vdg :
24 /dev/vdg1 ceph data, active, cluster ceph, osd.21, journal /dev/vdg2
25 /dev/vdg2 ceph journal, for /dev/vdg1
26 /dev/vdh :
27 /dev/vdh1 ceph data, active, cluster ceph, osd.23, journal /dev/vdh2
28 /dev/vdh2 ceph journal, for /dev/vdh1
29 /dev/vdi :
30 /dev/vdi1 ceph data, active, cluster ceph, osd.25, journal /dev/vdi2
31 /dev/vdi2 ceph journal, for /dev/vdi1
32 /dev/vdj :
33 /dev/vdj1 ceph data, active, cluster ceph, osd.27, journal /dev/vdj2
34 /dev/vdj2 ceph journal, for /dev/vdj1
35 /dev/vdk :
36 /dev/vdk1 ceph data, active, cluster ceph, osd.29, journal /dev/vdk2
37 /dev/vdk2 ceph journal, for /dev/vdk1
38
Interestingly mount
shows the disk as not being mounted?
1 [root@lib-cephosd3 ~]#mount
2 proc on /proc type proc (rw,nosuid,nodev,noexec,relatime)
3 sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime,seclabel)
4 devtmpfs on /dev type devtmpfs (rw,nosuid,seclabel,size=920412k,nr_inodes=230103,mode=755)
5 securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime)
6 tmpfs on /dev/shm type tmpfs (rw,nosuid,nodev,seclabel)
7 devpts on /dev/pts type devpts (rw,nosuid,noexec,relatime,seclabel,gid=5,mode=620,ptmxmode=000)
8 tmpfs on /run type tmpfs (rw,nosuid,nodev,seclabel,mode=755)
9 tmpfs on /sys/fs/cgroup type tmpfs (rw,nosuid,nodev,noexec,seclabel,mode=755)
10 cgroup on /sys/fs/cgroup/systemd type cgroup (rw,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd)
11 pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime)
12 cgroup on /sys/fs/cgroup/cpuset type cgroup (rw,nosuid,nodev,noexec,relatime,cpuset)
13 cgroup on /sys/fs/cgroup/cpu,cpuacct type cgroup (rw,nosuid,nodev,noexec,relatime,cpuacct,cpu)
14 cgroup on /sys/fs/cgroup/memory type cgroup (rw,nosuid,nodev,noexec,relatime,memory)
15 cgroup on /sys/fs/cgroup/devices type cgroup (rw,nosuid,nodev,noexec,relatime,devices)
16 cgroup on /sys/fs/cgroup/freezer type cgroup (rw,nosuid,nodev,noexec,relatime,freezer)
17 cgroup on /sys/fs/cgroup/net_cls type cgroup (rw,nosuid,nodev,noexec,relatime,net_cls)
18 cgroup on /sys/fs/cgroup/blkio type cgroup (rw,nosuid,nodev,noexec,relatime,blkio)
19 cgroup on /sys/fs/cgroup/perf_event type cgroup (rw,nosuid,nodev,noexec,relatime,perf_event)
20 cgroup on /sys/fs/cgroup/hugetlb type cgroup (rw,nosuid,nodev,noexec,relatime,hugetlb)
21 configfs on /sys/kernel/config type configfs (rw,relatime)
22 /dev/mapper/centos-root on / type xfs (rw,relatime,seclabel,attr2,inode64,noquota)
23 selinuxfs on /sys/fs/selinux type selinuxfs (rw,relatime)
24 systemd-1 on /proc/sys/fs/binfmt_misc type autofs (rw,relatime,fd=33,pgrp=1,timeout=300,minproto=5,maxproto=5,direct)
25 debugfs on /sys/kernel/debug type debugfs (rw,relatime)
26 hugetlbfs on /dev/hugepages type hugetlbfs (rw,relatime,seclabel)
27 mqueue on /dev/mqueue type mqueue (rw,relatime,seclabel)
28 /dev/vda1 on /boot type xfs (rw,relatime,seclabel,attr2,inode64,noquota)
29 /dev/vdh1 on /var/lib/ceph/osd/ceph-23 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
30 /dev/vdj1 on /var/lib/ceph/osd/ceph-27 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
31 /dev/vdi1 on /var/lib/ceph/osd/ceph-25 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
32 /dev/vdb1 on /var/lib/ceph/osd/ceph-11 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
33 /dev/vdd1 on /var/lib/ceph/osd/ceph-15 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
34 /dev/vdk1 on /var/lib/ceph/osd/ceph-29 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
35 /dev/vdf1 on /var/lib/ceph/osd/ceph-19 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
36 /dev/vde1 on /var/lib/ceph/osd/ceph-17 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
37 /dev/vdg1 on /var/lib/ceph/osd/ceph-21 type xfs (rw,noatime,seclabel,attr2,inode64,noquota)
38
What if i manually mount it?
1 mount /dev/vdc1 /var/lib/ceph/osd/ceph-13
2 cd /var/lib/ceph/osd/ceph-13/
3 [root@lib-cephosd3 ceph-13]# ll
4 total 44
5 -rw-r--r--. 1 root root 502 Feb 9 11:30 activate.monmap
6 -rw-r--r--. 1 root root 3 Feb 9 11:30 active
7 -rw-r--r--. 1 root root 37 Feb 9 11:30 ceph_fsid
8 drwxr-xr-x. 77 root root 1216 Feb 9 12:30 current
9 -rw-r--r--. 1 root root 37 Feb 9 11:30 fsid
10 lrwxrwxrwx. 1 root root 58 Feb 9 11:30 journal -> /dev/disk/by-partuuid/644f4d32-d440-4a6c-9fee-0a2187ac2eaf
11 -rw-r--r--. 1 root root 37 Feb 9 11:30 journal_uuid
12 -rw-------. 1 root root 57 Feb 9 11:30 keyring
13 -rw-r--r--. 1 root root 21 Feb 9 11:30 magic
14 -rw-r--r--. 1 root root 6 Feb 9 11:30 ready
15 -rw-r--r--. 1 root root 4 Feb 9 11:30 store_version
16 -rw-r--r--. 1 root root 53 Feb 9 11:30 superblock
17 -rw-r--r--. 1 root root 0 Feb 9 11:34 sysvinit
18 -rw-r--r--. 1 root root 3 Feb 9 11:30 whoami
Looks ok? Can i start that osd now?
1 [root@lib-cephosd3 osd]# service ceph restart osd.13
2 === osd.13 ===
3 === osd.13 ===
4 Stopping Ceph osd.13 on lib-cephosd3...done
5 === osd.13 ===
6 create-or-move updated item name 'osd.13' weight 0.01 at location {host=lib-cephosd3,root=default} to crush map
7 Starting Ceph osd.13 on lib-cephosd3...
8 Running as unit run-12509.service.
1 [root@lib-cephosd3 osd]# ceph osd tree
2 ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
3 -1 0.29984 root default
4 -2 0.09995 host lib-cephosd1
5 1 0.00999 osd.1 up 1.00000 1.00000
6 0 0.00999 osd.0 up 1.00000 1.00000
7 2 0.00999 osd.2 up 1.00000 1.00000
8 3 0.00999 osd.3 up 1.00000 1.00000
9 4 0.00999 osd.4 up 1.00000 1.00000
10 5 0.00999 osd.5 up 1.00000 1.00000
11 6 0.00999 osd.6 up 1.00000 1.00000
12 7 0.00999 osd.7 up 1.00000 1.00000
13 8 0.00999 osd.8 up 1.00000 1.00000
14 9 0.00999 osd.9 up 1.00000 1.00000
15 -3 0.09995 host lib-cephosd2
16 10 0.00999 osd.10 up 1.00000 1.00000
17 12 0.00999 osd.12 up 1.00000 1.00000
18 14 0.00999 osd.14 up 1.00000 1.00000
19 16 0.00999 osd.16 up 1.00000 1.00000
20 18 0.00999 osd.18 up 1.00000 1.00000
21 20 0.00999 osd.20 up 1.00000 1.00000
22 22 0.00999 osd.22 up 1.00000 1.00000
23 24 0.00999 osd.24 up 1.00000 1.00000
24 26 0.00999 osd.26 up 1.00000 1.00000
25 28 0.00999 osd.28 up 1.00000 1.00000
26 -4 0.09995 host lib-cephosd3
27 11 0.00999 osd.11 up 1.00000 1.00000
28 13 0.00999 osd.13 up 1.00000 1.00000
29 15 0.00999 osd.15 up 1.00000 1.00000
30 17 0.00999 osd.17 up 1.00000 1.00000
31 19 0.00999 osd.19 up 1.00000 1.00000
32 21 0.00999 osd.21 up 1.00000 1.00000
33 23 0.00999 osd.23 up 1.00000 1.00000
34 25 0.00999 osd.25 up 1.00000 1.00000
35 27 0.00999 osd.27 up 1.00000 1.00000
36 29 0.00999 osd.29 up 1.00000 1.00000
37 [root@lib-cephosd3 osd]#
1[root@lib-cephosd1 ~]# ceph -w
2 cluster 46ded320-ec09-40bc-a6c4-0a8ad3341035
3 health HEALTH_OK
4 monmap e2: 3 mons at {lib-cephmon1=172.18.0.51:6789/0,lib-cephmon2=172.18.0.52:6789/0,lib-cephmon3=172.18.0.53:6789/0}
5 election epoch 26, quorum 0,1,2 lib-cephmon1,lib-cephmon2,lib-cephmon3
6 osdmap e289: 30 osds: 30 up, 30 in
7 pgmap v763: 576 pgs, 5 pools, 0 bytes data, 0 objects
8 1235 MB used, 448 GB / 449 GB avail
9 576 active+clean
10
112016-02-11 13:53:39.539715 mon.0 [INF] pgmap v763: 576 pgs: 576 active+clean; 0 bytes data, 1235 MB used, 448 GB / 449 GB avail
That worked :)