Linux

Common checks

Monitoring

# See CPU + RAM usage, system stats and open processes
top

# Only list processes making active use of the CPU
top -i

# Only list processes making active use of the CPU, and include the entire command being instead of just the tool-name
top -ci

# Prettier version of top that can be customized
htop

# Reimagined version of top, includes network and disk usage by default
btop

# List all running processes
ps aux

Systemd

# Open journalctl at the beginning
journalctl -b

# Open journalctl at the end
journalctl -e

# Open journalctl but include service information
journalctl -x

# Show journalctl logs for the sshd service, starting from the end
journalctl -u sshd -e

# Output contents directly to the shell
journalctl --no-pager

OS & Distribution

# Print OS and host information
hostnamectl

# Show OS and distribution information
cat /proc/version

# Show OS and distribution information
cat /etc/os-release

# Print distribution-specific information
lsb_release -a

Hardware & kernel

# List installed kernel modules
lsmod

# Print Kernel messages
dmesg

# Print Kernel messages with humanized timestamps
dmesg -T

# SCSI hardware information
cat /proc/scsi/scsi

# Print hardware/BIOS information
dmidecode 

# Print hardware/BIOS information of a specific type
dmidecode -t 1

# List all connected hardware
lshw

# List physical network hardware
lshw -short -class network

# List physical memory hardware
lshw -class memory

# Show PCI information
lspci

# Show verbose PCI information
lspci -v

# List all block/filesystem devices
lsblk

# List block devices and partition tables
fdisk -l

Pacemaker

# Show status of the pacemaker cluster
pcs cluster status

# Show status of the pacemaker service
pcs status

# Show configured pacemaker resources
pcs resource config

# Show a specific configured resource
pcs resource show ResourceNameHere

Services

Filesystems

# List clients connected to the local filesystem
showmount

SMB/CIFS checks

# Samba checks
smbstatus
smbstatus -S
smbstatus -b

# Samba set debug mode
smbcontrol smbd debug 1

NFS

Checks

https://www.ibm.com/docs/en/aix/7.2?topic=troubleshooting-identifying-nfs-problems

# NFS 
nfsstat

# Detailed RPC and package information
nfsstat -o all

# Every RPC "program" is bound to a specific NFS version. Use NFS/CTDB logs in combination with the program ID to identify the failing component
rpcinfo -p

Common

Exports

Use file /etc/exports to define exports to cliënts.

# Create the folders before exporting them
mkdir -p /data/exports/customer1000/finance
mkdir -p /data/exports/customer1001/backup

NFSv3 example:

#////////////////////////////////////////////////////////////////////////////////////////////
# Customer1000
/data/exports/customer1000/finance 192.168.20.1(rw,no_root_squash,sync) 192.168.20.2(rw,sync)
#////////////////////////////////////////////////////////////////////////////////////////////
# Customer1001
/data/exports/customer1001/backup 192.168.30.1(rw,no_root_squash) 192.168.30.1(rw,no_root_squash,sync)

# Reload the NFS server to apply changes within /etc/exports
systemctl reload nfs-server

Mount

# Install NFS cliënt (Ubuntu)
apt install nfs-common

# Install NFS cliënt (RHEL)
yum install nfs-utils

# Mount NFS share located on server 192.168.20.1 on path /data/exports/customer1000/finance, to local server /mnt/nfs/
mount -v -t nfs 192.168.20.1:/data/exports/customer1000/finance /mnt/nfs/

Optimizations

Change these values depending on your usage and the available resources on your server.

# /etc/sysctl.d/nfs-tuning.conf
net.core.rmem_max=1048576
net.core.rmem_default=1048576
net.core.wmem_max=1048576
net.core.wmem_default=1048576
net.ipv4.tcp_rmem=4096 1048576 134217728
net.ipv4.tcp_wmem=4096 1048576 134217728
vm.min_free_kbytes=8388608

# Reload above optimization
sysctl -p /etc/sysctl.d/nfs-tuning.conf

Raise the number of NFS threads

# /etc/sysconfig/nfs

# Number of nfs server processes to be started.
# The default is 8.
#RPCNFSDCOUNT=16
RPCNFSDCOUNT=128

Activate NFSD count on the fly

rpc.nfsd 64

# Check amount of threads
/proc/fs/nfsd/threads

Ceph

https://sabaini.at/pages/ceph-cheatsheet.html

Checks

# Display the running Ceph version
ceph -v

# Check the clusters' health and status
ceph -s

# Watch the clusters' health and status in real time
ceph -w

# Show detailed logs relating to cluster health
ceph health detail

# List all Ceph 'containers' and OSDs
ceph orch ls

# List available storage devices
ceph orch device ls

# Show logs for a specific service
ceph orch ls --service_name osd.all-available-devices --format yaml

# Re-check the status of a host
ceph cephadm check-host schijf-3

# List all pools
ceph osd lspools

# See the status of all OSDs
ceph osd stat

# List all OSDs
ceph osd tree

# List all Placement Groups
ceph pg dump

# Check the status of Ceph PGs
ceph pg stat

Commands

# Enter the Ceph shell (single cluster)
cephadm shell

Installation

Using Cephadm: https://docs.ceph.com/en/quincy/cephadm/install/

Cephadm

# Create a folder for the cephadm tool
mkdir cephadm
cd cephadm/

# Download cephadm (Quincy)
curl --silent --remote-name --location https://github.com/ceph/ceph/raw/quincy/src/cephadm/cephadm
chmod +x cephadm

# Output help
./cephadm -h

# Install cephadm (Quincy) release
./cephadm add-repo --release quincy
./cephadm install

# Check if cephadm is properly installed
which cephadm

Bootstrap

# Bootstrap node and install Ceph
cephadm bootstrap --mon-ip 192.168.100.11

# Check the status of the cluster
cephadm shell -- ceph -s
docker ps


## Optional
# Enter the Ceph shell (single cluster)
cephadm shell

# Exit the Ceph shell
exit

# Install common Ceph packages/tools 
cephadm install ceph-common

# Display the Ceph version
ceph -v

Add additional hosts

# On your bootstrapped node create a key for SSH-access to the other hosts.
ssh-keygen
cat .ssh/id_rsa.pub

# Add the newly generated key to the authorized_keys file for the relevant user, on the other hosts.

# Copy the Ceph clusters' public key to the other nodes
ssh-copy-id -f -i /etc/ceph/ceph.pub root@storage-2
ssh-copy-id -f -i /etc/ceph/ceph.pub root@storage-3

# Add the admin role to the other nodes
ceph orch host add storage-2 10.4.20.2 _admin
ceph orch host add storage-3 10.4.20.3 _admin

OSD creation

https://github.com/rook/rook/issues/7519

If you've installed ceph-osd on your host, this step will fail horribly with errors such as:

-1 bluestore(/var/lib/ceph/osd/ceph-1//block) _read_bdev_label failed to open /var/lib/ceph/osd/ceph-1//block: (13) Permission denied
-1 bdev(0x5571d5f69400 /var/lib/ceph/osd/ceph-1//block) open open got: (13) Permission denied
-1 OSD::mkfs: ObjectStore::mkfs failed with error (13) Permission denied
-1 ESC[0;31m ** ERROR: error creating empty object store in /var/lib/ceph/osd/ceph-0/: (13) Permission deniedESC[0m
 OSD, will rollback changes

# Configure all available storage to be used as OSD storage
ceph orch apply osd --all-available-devices

# Check for OSD problems
watch ceph -s
watch ceph osd tree

Commands

# Enter the Ceph shell for a specific cluster
sudo /usr/sbin/cephadm shell --fsid asdjwqe-asjd324-asdki321-821asd-asd241-asdn1234- -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin2.keyring

# Give node storage-4, which is already a cluster member, the admin tag
ceph orch host label add storage-4 _admin

# Mount a Ceph filesystem with 3 mon hosts, using a secretfile 
# Contents in the secretfile is ONLY the secret / key
mount -t ceph 192.168.0.11,192.168.0.12,192.168.0.13:/shares/mycustomer/asd8asd8-as8d83-df4mjvjdf /mnt/ceph-storage -o name=customershare-28,secretfile=/etc/ceph/customer28-secretfile

Upgrade

Make sure your cluster status is healthy first!

# Upgrade Ceph to a specific version
ceph orch upgrade start --ceph-version 17.2.0

# Check the status of the Ceph upgrade
ceph orch upgrade status

# Stop the Ceph upgrade
ceph orch upgrade stop

RBD-NBD

# List available volumes within the openstackvolumes pool
rbd ls openstackhdd

# List all available snapshots for object volume-asd9p12o3-90b2-1238-1209-as980d7213hs, which resides in pool ghgvolumes
rbd snap ls openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs

# Map the volume-object to the local filesystem
rbd-nbd map openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs

# Map the volume-object as read-only to the local filesystem
rbd-nbd map --read-only openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs

# List currently mapped objects
rbd-nbd list-mapped

# Check what filesystem and partition the device contains
fdisk -l /dev/nbd1

# Mount the device to a local folder
mount /dev/nbd1p1 /mnt/storage

# Unmount the device from the local folder
umount /mnt/storage


# 2 methods to unmap
# Unmap the mapped object
rbd-nbd unmap /dev/nbd2

# Unmap the mapped object
rbd-nbd unmap volume-asd9p12o3-90b2-1238-1209-as980d7213hs

Remove node

# Remove running daemons
ceph orch host drain storage-3

# Remove host from the cluster
ceph orch host rm storage-3

# In storage-3, restart the node and restart
shutdown -r now

Destroy node

Scorched earth
Only execute if you want to annihalate your node and or cluster.

# Kill and destroy OSD 0
ceph osd down 0 && ceph osd destroy 0 --force

# Stop Ceph services
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@crash.host-1.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mgr.host-1.xmatqa.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@node-exporter.host-1.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service
systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn.target

# Disable Ceph services
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@crash.host-1.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mgr.host-1.xmatqa.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@node-exporter.host-1.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service
systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn.target

# Destroy everything (packages, containers, configuration)
ceph-deploy uninstall host-1
ceph-deploy purge host-1
rm -rf /var/lib/ceph

# Check for failed services
systemctl | grep ceph

# Reset them so they disable properly
systemctl reset-failed ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service

# reboot
shutdown -r now

BTRFS

https://vitux.com/how-to-format-a-harddisk-partition-with-btrfs-on-ubuntu-20-04/

Using LVM

# Install LVM creation tools depending on your OS
yum install lvm2
apt install lvm2

# Check and note the disk you need
fdisk -l

# Format /dev/vdb as BTRFS
echo -e "n\np\n1\n\n\nt\n8E\np\nw" | fdisk /dev/vdb
 
# Create LVM 
pvcreate /dev/vdb1
vgcreate vdb_vg /dev/vdb1
lvcreate -l 100%FREE  -n btrfs vdb_vg
 
# Check
pvs
vgs
 
# Create the BTRFS filesystem
mkfs.btrfs /dev/vdb_vg/btrfs
 
# Create a folder for the BTRFS mount
mkdir -p /mnt/btrfs1

# Mount the BTRFS filesystem
mount -t btrfs /dev/vdb_vg/btrfs /mnt/btrfs1/
 
# Modify fstab so the filesystem get mounted automatically on boot
cat << 'EOF' >> /etc/fstab
/dev/mapper/vdb_vg-btrfs  /mnt/btrfs1    btrfs     defaults        0 0
EOF

User management

# Create the books group
groupadd books

# Make myrthe part of the "philosophy" and "books" groups
usermod myrthe -aG philosophy,books

# See the groups myrthe is part of
groups myrthe

# The owner gains full control, group and everyone may: read, write and execute
chmod 755 /home/ring/gollum.txt

# Make ballrog the owner of the /data/sf4/cup folder
chown ballrog:ballrog /data/sf4/cup

# Make all files located anywhere within the .ssh, owned by the stalin user and soviet group
chown -R stalin:soviet /home/stalin/.ssh

# Delete the simba user and include his home folder and mail spool
userdel -r simba

Create user (RHEL)

# Create user with a home-folder and add him to the wheel group
useradd -m john -g wheel
 
# Set a password for the john user
passwd john

# Create the SSH folder for john
mkdir -p /home/john/.ssh

# Add a public key to john's account
echo "ssh rsa-123980idfas89132hadsckjh871234" >> /home/john/.ssh/authorized_keys

# Set proper permissions for the .ssh folder and authorized_keys
chown -R john:john /home/john/.ssh
chmod 700 /home/john/.ssh
chmod 600 /home/john/.ssh/authorized_keys

Sudoers

https://www.networkworld.com/article/3237946/building-command-groups-with-sudo.html

Concerns /etc/sudoers

# Allow user jabami to execute any command, without specifying a passwd
jabami ALL=(ALL) NOPASSWD: ALL

# Allow user "chris" to perform the 2 given commands with sudo, no password.
## Define user and associate the command group variable "UPDATE_CMDS"
drake    ALL=(ALL) NOPASSWD: UPDATE_CMDS

## Define commands for the "UPDATE_CMDS" variable
Cmnd_Alias UPDATE_CMDS = /usr/bin/apt-get update, /usr/bin/apt-get upgrade

# Allow members of the group "researchers" to perform the 2 given commands with sudo, no password.
## User alias specification
%researchers    ALL=(ALL) NOPASSWD: UPDATE_CMDS2

## Define commands for the "UPDATE_CMDS2" variable
Cmnd_Alias UPDATE_CMDS2 = /usr/bin/apt-get update, /usr/bin/apt-get upgrade

Other

Throughput test

# Test bandwidth throughput with iperf
# Listen on server-A on port 5101
iperf3 -s -p 5101

# Connect to server-A from server-B
iperf3 -c 192.168.0.1 -p 5101

# Testing disk/share throughput
# Create "testfile" of size 1710x1M in current folder
time dd if=/dev/zero of=testfile bs=1M count=1710

# Create "'testfile2" of size 5x1G in current folder
time dd if=/dev/zero of=testfile2 bs=1G count=5

# Show copy-time of "testfile" to disk or share
time cp testfile /mnt/btfrs/data/<LOCATION>/

# Methods of testing disk or share throughput
# show read-time from the mount to null
time cat /mnt/btfrs/data/<FILE> > /dev/null

# show copy-time from the mount to null
time dd if=/mnt/btfrs/data/<FILE> of=/dev/null bs=1M

# show copy-time from the mount to the current folder
time cp /mnt/btfrs/data/<FILE> .

# Copy one folder to another with rsync while showing progress
rsync -avhW --no-compress --progress <source>/ <destination>/

Create different temp folder

# Create a temporary TMP folder
mkdir -p /scratch/tmp/

# Activate temporary TMP folder
export TMPDIR=/scratch/tmp

Links

Linux

Common checks

Monitoring

Systemd

OS & Distribution

Hardware & kernel

Pacemaker

Services

Filesystems

SMB/CIFS checks

NFS

Checks

Common

Exports

Mount

Optimizations

Ceph

Checks

Commands

Installation

Cephadm

Bootstrap

Add additional hosts

OSD creation

Commands

Upgrade

RBD-NBD

Remove node

Destroy node

BTRFS

User management

Create user (RHEL)

Sudoers

Other

Throughput test

Create different temp folder

Links

Navigation menu

Search