Linux
From Cheatsheet
Common checks
Monitoring
# See CPU + RAM usage, system stats and open processes top # Only list processes making active use of the CPU top -i # Only list processes making active use of the CPU, and include the entire command being instead of just the tool-name top -ci # Prettier version of top that can be customized htop # Reimagined version of top, includes network and disk usage by default btop # List all running processes ps aux
Systemd
# Open journalctl at the beginning journalctl -b # Open journalctl at the end journalctl -e # Open journalctl but include service information journalctl -x # Show journalctl logs for the sshd service, starting from the end journalctl -u sshd -e # Output contents directly to the shell journalctl --no-pager
OS & Distribution
# Print OS and host information hostnamectl # Show OS and distribution information cat /proc/version # Show OS and distribution information cat /etc/os-release # Print distribution-specific information lsb_release -a
Hardware & kernel
# List installed kernel modules lsmod # Print Kernel messages dmesg # Print Kernel messages with humanized timestamps dmesg -T # SCSI hardware information cat /proc/scsi/scsi # Print hardware/BIOS information dmidecode # Print hardware/BIOS information of a specific type dmidecode -t 1 # List all connected hardware lshw # List physical network hardware lshw -short -class network # List physical memory hardware lshw -class memory # Show PCI information lspci # Show verbose PCI information lspci -v # List all block/filesystem devices lsblk # List block devices and partition tables fdisk -l
Pacemaker
# Show status of the pacemaker cluster pcs cluster status # Show status of the pacemaker service pcs status # Show configured pacemaker resources pcs resource config # Show a specific configured resource pcs resource show ResourceNameHere
Services
Filesystems
# List clients connected to the local filesystem showmount
SMB/CIFS checks
# Samba checks smbstatus smbstatus -S smbstatus -b # Samba set debug mode smbcontrol smbd debug 1
NFS
Checks
# NFS nfsstat # Detailed RPC and package information nfsstat -o all # Every RPC "program" is bound to a specific NFS version. Use NFS/CTDB logs in combination with the program ID to identify the failing component rpcinfo -p
Common
Exports
Use file /etc/exports to define exports to cliënts.
# Create the folders before exporting them mkdir -p /data/exports/customer1000/finance mkdir -p /data/exports/customer1001/backup
NFSv3 example:
#//////////////////////////////////////////////////////////////////////////////////////////// # Customer1000 /data/exports/customer1000/finance 192.168.20.1(rw,no_root_squash,sync) 192.168.20.2(rw,sync) #//////////////////////////////////////////////////////////////////////////////////////////// # Customer1001 /data/exports/customer1001/backup 192.168.30.1(rw,no_root_squash) 192.168.30.1(rw,no_root_squash,sync)
# Reload the NFS server to apply changes within /etc/exports systemctl reload nfs-server
Mount
# Install NFS cliënt (Ubuntu) apt install nfs-common # Install NFS cliënt (RHEL) yum install nfs-utils # Mount NFS share located on server 192.168.20.1 on path /data/exports/customer1000/finance, to local server /mnt/nfs/ mount -v -t nfs 192.168.20.1:/data/exports/customer1000/finance /mnt/nfs/
Optimizations
Change these values depending on your usage and the available resources on your server.
# /etc/sysctl.d/nfs-tuning.conf net.core.rmem_max=1048576 net.core.rmem_default=1048576 net.core.wmem_max=1048576 net.core.wmem_default=1048576 net.ipv4.tcp_rmem=4096 1048576 134217728 net.ipv4.tcp_wmem=4096 1048576 134217728 vm.min_free_kbytes=8388608
# Reload above optimization sysctl -p /etc/sysctl.d/nfs-tuning.conf
Raise the number of NFS threads
# /etc/sysconfig/nfs # Number of nfs server processes to be started. # The default is 8. #RPCNFSDCOUNT=16 RPCNFSDCOUNT=128
Activate NFSD count on the fly
rpc.nfsd 64 # Check amount of threads /proc/fs/nfsd/threads
Ceph
Checks
# Display the running Ceph version ceph -v # Check the clusters' health and status ceph -s # Watch the clusters' health and status in real time ceph -w # Show detailed logs relating to cluster health ceph health detail # List all Ceph 'containers' and OSDs ceph orch ls # List available storage devices ceph orch device ls # Show logs for a specific service ceph orch ls --service_name osd.all-available-devices --format yaml # Re-check the status of a host ceph cephadm check-host schijf-3
# List all pools ceph osd lspools # See the status of all OSDs ceph osd stat # List all OSDs ceph osd tree
# List all Placement Groups ceph pg dump # Check the status of Ceph PGs ceph pg stat
Commands
# Enter the Ceph shell (single cluster) cephadm shell
Installation
Using Cephadm: https://docs.ceph.com/en/quincy/cephadm/install/
Cephadm
# Create a folder for the cephadm tool mkdir cephadm cd cephadm/ # Download cephadm (Quincy) curl --silent --remote-name --location https://github.com/ceph/ceph/raw/quincy/src/cephadm/cephadm chmod +x cephadm # Output help ./cephadm -h # Install cephadm (Quincy) release ./cephadm add-repo --release quincy ./cephadm install # Check if cephadm is properly installed which cephadm
Bootstrap
# Bootstrap node and install Ceph cephadm bootstrap --mon-ip 192.168.100.11 # Check the status of the cluster cephadm shell -- ceph -s docker ps ## Optional # Enter the Ceph shell (single cluster) cephadm shell # Exit the Ceph shell exit # Install common Ceph packages/tools cephadm install ceph-common # Display the Ceph version ceph -v
Add additional hosts
# On your bootstrapped node create a key for SSH-access to the other hosts. ssh-keygen cat .ssh/id_rsa.pub # Add the newly generated key to the authorized_keys file for the relevant user, on the other hosts. # Copy the Ceph clusters' public key to the other nodes ssh-copy-id -f -i /etc/ceph/ceph.pub root@storage-2 ssh-copy-id -f -i /etc/ceph/ceph.pub root@storage-3 # Add the admin role to the other nodes ceph orch host add storage-2 10.4.20.2 _admin ceph orch host add storage-3 10.4.20.3 _admin
OSD creation
If you've installed ceph-osd on your host, this step will fail horribly with errors such as:
-1 bluestore(/var/lib/ceph/osd/ceph-1//block) _read_bdev_label failed to open /var/lib/ceph/osd/ceph-1//block: (13) Permission denied -1 bdev(0x5571d5f69400 /var/lib/ceph/osd/ceph-1//block) open open got: (13) Permission denied -1 OSD::mkfs: ObjectStore::mkfs failed with error (13) Permission denied -1 ESC[0;31m ** ERROR: error creating empty object store in /var/lib/ceph/osd/ceph-0/: (13) Permission deniedESC[0m OSD, will rollback changes
# Configure all available storage to be used as OSD storage ceph orch apply osd --all-available-devices # Check for OSD problems watch ceph -s watch ceph osd tree
Commands
# Enter the Ceph shell for a specific cluster sudo /usr/sbin/cephadm shell --fsid asdjwqe-asjd324-asdki321-821asd-asd241-asdn1234- -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin2.keyring # Give node storage-4, which is already a cluster member, the admin tag ceph orch host label add storage-4 _admin # Mount a Ceph filesystem with 3 mon hosts, using a secretfile # Contents in the secretfile is ONLY the secret / key mount -t ceph 192.168.0.11,192.168.0.12,192.168.0.13:/shares/mycustomer/asd8asd8-as8d83-df4mjvjdf /mnt/ceph-storage -o name=customershare-28,secretfile=/etc/ceph/customer28-secretfile
Upgrade
Make sure your cluster status is healthy first!
# Upgrade Ceph to a specific version ceph orch upgrade start --ceph-version 17.2.0 # Check the status of the Ceph upgrade ceph orch upgrade status # Stop the Ceph upgrade ceph orch upgrade stop
RBD-NBD
# List available volumes within the openstackvolumes pool rbd ls openstackhdd # List all available snapshots for object volume-asd9p12o3-90b2-1238-1209-as980d7213hs, which resides in pool ghgvolumes rbd snap ls openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs # Map the volume-object to the local filesystem rbd-nbd map openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs # Map the volume-object as read-only to the local filesystem rbd-nbd map --read-only openstackhdd/volume-asd9p12o3-90b2-1238-1209-as980d7213hs # List currently mapped objects rbd-nbd list-mapped # Check what filesystem and partition the device contains fdisk -l /dev/nbd1 # Mount the device to a local folder mount /dev/nbd1p1 /mnt/storage # Unmount the device from the local folder umount /mnt/storage # 2 methods to unmap # Unmap the mapped object rbd-nbd unmap /dev/nbd2 # Unmap the mapped object rbd-nbd unmap volume-asd9p12o3-90b2-1238-1209-as980d7213hs
Remove node
# Remove running daemons ceph orch host drain storage-3 # Remove host from the cluster ceph orch host rm storage-3 # In storage-3, restart the node and restart shutdown -r now
Destroy node
Scorched earth
Only execute if you want to annihalate your node and or cluster.
# Kill and destroy OSD 0 ceph osd down 0 && ceph osd destroy 0 --force # Stop Ceph services systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@crash.host-1.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mgr.host-1.xmatqa.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@node-exporter.host-1.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service systemctl stop ceph-asd82asd-asd8-as92-a889-po89xc732cmn.target # Disable Ceph services systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@crash.host-1.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mgr.host-1.xmatqa.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@mon.host-1.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@node-exporter.host-1.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service systemctl disable ceph-asd82asd-asd8-as92-a889-po89xc732cmn.target # Destroy everything (packages, containers, configuration) ceph-deploy uninstall host-1 ceph-deploy purge host-1 rm -rf /var/lib/ceph # Check for failed services systemctl | grep ceph # Reset them so they disable properly systemctl reset-failed ceph-asd82asd-asd8-as92-a889-po89xc732cmn@prometheus.host-1.service # reboot shutdown -r now
BTRFS
Using LVM
# Install LVM creation tools depending on your OS yum install lvm2 apt install lvm2 # Check and note the disk you need fdisk -l # Format /dev/vdb as BTRFS echo -e "n\np\n1\n\n\nt\n8E\np\nw" | fdisk /dev/vdb # Create LVM pvcreate /dev/vdb1 vgcreate vdb_vg /dev/vdb1 lvcreate -l 100%FREE -n btrfs vdb_vg # Check pvs vgs # Create the BTRFS filesystem mkfs.btrfs /dev/vdb_vg/btrfs # Create a folder for the BTRFS mount mkdir -p /mnt/btrfs1 # Mount the BTRFS filesystem mount -t btrfs /dev/vdb_vg/btrfs /mnt/btrfs1/ # Modify fstab so the filesystem get mounted automatically on boot cat << 'EOF' >> /etc/fstab /dev/mapper/vdb_vg-btrfs /mnt/btrfs1 btrfs defaults 0 0 EOF
User management
# Create the books group groupadd books # Make myrthe part of the "philosophy" and "books" groups usermod myrthe -aG philosophy,books # See the groups myrthe is part of groups myrthe # The owner gains full control, group and everyone may: read, write and execute chmod 755 /home/ring/gollum.txt # Make ballrog the owner of the /data/sf4/cup folder chown ballrog:ballrog /data/sf4/cup # Make all files located anywhere within the .ssh, owned by the stalin user and soviet group chown -R stalin:soviet /home/stalin/.ssh # Delete the simba user and include his home folder and mail spool userdel -r simba
Create user (RHEL)
# Create user with a home-folder and add him to the wheel group useradd -m john -g wheel # Set a password for the john user passwd john # Create the SSH folder for john mkdir -p /home/john/.ssh # Add a public key to john's account echo "ssh rsa-123980idfas89132hadsckjh871234" >> /home/john/.ssh/authorized_keys # Set proper permissions for the .ssh folder and authorized_keys chown -R john:john /home/john/.ssh chmod 700 /home/john/.ssh chmod 600 /home/john/.ssh/authorized_keys
Sudoers
Concerns /etc/sudoers
# Allow user jabami to execute any command, without specifying a passwd jabami ALL=(ALL) NOPASSWD: ALL # Allow user "chris" to perform the 2 given commands with sudo, no password. ## Define user and associate the command group variable "UPDATE_CMDS" drake ALL=(ALL) NOPASSWD: UPDATE_CMDS ## Define commands for the "UPDATE_CMDS" variable Cmnd_Alias UPDATE_CMDS = /usr/bin/apt-get update, /usr/bin/apt-get upgrade # Allow members of the group "researchers" to perform the 2 given commands with sudo, no password. ## User alias specification %researchers ALL=(ALL) NOPASSWD: UPDATE_CMDS2 ## Define commands for the "UPDATE_CMDS2" variable Cmnd_Alias UPDATE_CMDS2 = /usr/bin/apt-get update, /usr/bin/apt-get upgrade
Other
Throughput test
# Test bandwidth throughput with iperf # Listen on server-A on port 5101 iperf3 -s -p 5101 # Connect to server-A from server-B iperf3 -c 192.168.0.1 -p 5101
# Testing disk/share throughput # Create "testfile" of size 1710x1M in current folder time dd if=/dev/zero of=testfile bs=1M count=1710 # Create "'testfile2" of size 5x1G in current folder time dd if=/dev/zero of=testfile2 bs=1G count=5 # Show copy-time of "testfile" to disk or share time cp testfile /mnt/btfrs/data/<LOCATION>/ # Methods of testing disk or share throughput # show read-time from the mount to null time cat /mnt/btfrs/data/<FILE> > /dev/null # show copy-time from the mount to null time dd if=/mnt/btfrs/data/<FILE> of=/dev/null bs=1M # show copy-time from the mount to the current folder time cp /mnt/btfrs/data/<FILE> . # Copy one folder to another with rsync while showing progress rsync -avhW --no-compress --progress <source>/ <destination>/
Create different temp folder
# Create a temporary TMP folder mkdir -p /scratch/tmp/ # Activate temporary TMP folder export TMPDIR=/scratch/tmp