Skip to content

Quick Command Reference

Common commands for managing the tower-fleet k3s cluster.


Cluster Health

# View all nodes
kubectl get nodes
kubectl get nodes -o wide

# Check all system pods
kubectl get pods -A

# Cluster info
kubectl cluster-info

# Component status
kubectl get componentstatuses

# Events (useful for debugging)
kubectl get events --sort-by='.lastTimestamp'

SSH Access

# Master node
ssh root@10.89.97.201

# Worker nodes
ssh root@10.89.97.202
ssh root@10.89.97.203

k3s Service Management

# On master (201)
ssh root@10.89.97.201 'systemctl status k3s'
ssh root@10.89.97.201 'systemctl restart k3s'
ssh root@10.89.97.201 'journalctl -u k3s -f'

# On workers (202, 203)
ssh root@10.89.97.202 'systemctl status k3s-agent'
ssh root@10.89.97.202 'systemctl restart k3s-agent'
ssh root@10.89.97.202 'journalctl -u k3s-agent -f'

Proxmox VM Management

# List VMs
qm list

# Start/stop/restart
qm start 201 202 203
qm stop 201 202 203
qm restart 201

# VM status
qm status 201

# Destroy VMs (careful!)
qm destroy 201 202 203

Pods

# List all pods
kubectl get pods -A

# Pods in specific namespace
kubectl get pods -n kube-system

# Watch pods
kubectl get pods -A --watch

# Describe pod
kubectl describe pod/coredns-xxx -n kube-system

# Logs
kubectl logs pod-name
kubectl logs -f pod-name  # Follow
kubectl logs pod-name -n namespace-name

# Execute command in pod
kubectl exec -it pod-name -- bash
kubectl exec pod-name -- ls /app

Deployments

# List deployments
kubectl get deployments -A

# Scale deployment
kubectl scale deployment/my-app --replicas=3

# Update image
kubectl set image deployment/my-app container-name=new-image:v2

# Rollout status
kubectl rollout status deployment/my-app

# Rollback
kubectl rollout undo deployment/my-app

# Restart deployment (rolling restart)
kubectl rollout restart deployment/my-app

Services

# List services
kubectl get svc -A

# Get LoadBalancer IPs
kubectl get svc -o wide

# Describe service
kubectl describe svc/my-service

Namespaces

# List namespaces
kubectl get namespaces
kubectl get ns  # Short form

# Create namespace
kubectl create namespace my-app

# Delete namespace (deletes all resources in it!)
kubectl delete namespace my-app

# Set default namespace for context
kubectl config set-context --current --namespace=my-app

Apply/Delete Resources

# Apply YAML file
kubectl apply -f deployment.yaml

# Apply directory
kubectl apply -f ./manifests/

# Apply from URL
kubectl apply -f https://example.com/manifest.yaml

# Delete resources
kubectl delete -f deployment.yaml
kubectl delete deployment/my-app
kubectl delete pod/my-app-xxx

# Delete all in namespace
kubectl delete all --all -n namespace-name

Troubleshooting

# View events
kubectl get events --sort-by='.lastTimestamp'
kubectl get events -n kube-system

# Describe (shows events at bottom)
kubectl describe node/k3s-master
kubectl describe pod/my-pod

# Resource usage
kubectl top nodes
kubectl top pods -A

# Port forward (access pod locally)
kubectl port-forward pod/my-app 8080:80
kubectl port-forward svc/my-service 8080:80

# Debug pod
kubectl run debug --rm -it --image=busybox -- sh

Kubeconfig Management

# View current context
kubectl config current-context

# List all contexts
kubectl config get-contexts

# Switch context
kubectl config use-context other-cluster

# View merged config
kubectl config view

# Set namespace for current context
kubectl config set-context --current --namespace=my-app

Storage (Longhorn)

# List PVCs
kubectl get pvc -A

# List PVs
kubectl get pv

# List storage classes
kubectl get storageclass
kubectl get sc  # Short form

# Resize PVC (Longhorn supports online expansion)
kubectl patch pvc <pvc-name> -n <namespace> \
  -p '{"spec":{"resources":{"requests":{"storage":"30Gi"}}}}'

Docker Registry

# Registry URL (NodePort)
# http://10.89.97.201:30500

# List all images
curl -s http://10.89.97.201:30500/v2/_catalog | jq

# List tags for an image
curl -s http://10.89.97.201:30500/v2/<image>/tags/list | jq

# Check registry disk usage
kubectl exec -n docker-registry deploy/docker-registry -- df -h /var/lib/registry

# Cleanup old tags (dry run)
/root/tower-fleet/scripts/registry-cleanup.sh

# Cleanup old tags (execute + garbage collection)
/root/tower-fleet/scripts/registry-cleanup.sh --execute --gc

# Cleanup with custom retention (keep 3 versions)
/root/tower-fleet/scripts/registry-cleanup.sh --execute --gc --keep 3

# Resize registry PVC
kubectl patch pvc docker-registry-pvc -n docker-registry \
  -p '{"spec":{"resources":{"requests":{"storage":"30Gi"}}}}'

GitOps (Flux - after Phase 4)

# Check Flux status
flux check

# Reconcile (force sync from git)
flux reconcile kustomization apps --with-source

# View Flux resources
flux get all

# Suspend/resume
flux suspend kustomization apps
flux resume kustomization apps

Observability (after Phase 5)

# Access Grafana (after deployment)
kubectl port-forward -n monitoring svc/grafana 3000:80
# Then: http://localhost:3000

# Check Prometheus
kubectl port-forward -n monitoring svc/prometheus 9090:9090
# Then: http://localhost:9090

Disaster Recovery

# Backup kubeconfig
cp ~/.kube/config ~/.kube/config.backup.$(date +%Y%m%d)

# Export all resources in namespace
kubectl get all -n my-app -o yaml > backup-my-app.yaml

# Get join token (to add new nodes)
ssh root@10.89.97.201 'cat /var/lib/rancher/k3s/server/node-token'

Quick Deployment Example

# Create deployment
kubectl create deployment nginx --image=nginx

# Expose as LoadBalancer
kubectl expose deployment nginx --port=80 --type=LoadBalancer

# Get service IP
kubectl get svc nginx

# Test
curl http://LOADBALANCER_IP

# Clean up
kubectl delete deployment nginx
kubectl delete svc nginx

Useful Aliases

Add to ~/.bashrc:

# kubectl shorthand
alias k='kubectl'

# Common kubectl commands
alias kgp='kubectl get pods'
alias kgs='kubectl get svc'
alias kgn='kubectl get nodes'
alias kga='kubectl get all'
alias kdp='kubectl describe pod'
alias kl='kubectl logs'
alias kx='kubectl exec -it'

# Watch
alias wkgp='watch kubectl get pods'

Monitoring & Alerting

Prometheus (Metrics)

# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090
# Then: http://localhost:9090

# Quick query from CLI
kubectl exec -n monitoring prometheus-kube-prometheus-stack-prometheus-0 -- \
  wget -qO- 'http://localhost:9090/api/v1/query?query=up' | jq

# List all PrometheusRules
kubectl get prometheusrules -A

# View specific rule
kubectl get prometheusrule <name> -n <namespace> -o yaml

# Edit alert rules
kubectl edit prometheusrule <name> -n <namespace>

# Apply alert rules from manifest
kubectl apply -f /root/tower-fleet/manifests/apps/<app>/prometheus-rules.yaml

AlertManager (Notifications)

# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9093
# Then: http://localhost:9093

# View active alerts
kubectl exec -n monitoring alertmanager-kube-prometheus-stack-alertmanager-0 -- \
  wget -qO- 'http://localhost:9093/api/v2/alerts' | jq

# Silence alerts via UI (easier than CLI)
# Go to http://localhost:9093 → find alert → click "Silence"

# Check AlertManager logs
kubectl logs -n monitoring alertmanager-kube-prometheus-stack-alertmanager-0

Grafana (Dashboards)

# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80
# Then: http://localhost:3000

# Get admin password
kubectl get secret -n monitoring kube-prometheus-stack-grafana \
  -o jsonpath='{.data.admin-password}' | base64 -d; echo

# Default user: admin

Loki (Logs)

# Port-forward to access
kubectl port-forward -n monitoring svc/loki-gateway 3100:80

# Query logs via CLI
kubectl exec -n monitoring deploy/loki-gateway -- \
  wget -qO- 'http://loki:3100/loki/api/v1/labels' | jq

# View via Grafana "Explore" tab (recommended)

Common Alert Queries

# Check which alerts are firing
kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 &
curl -s 'http://localhost:9090/api/v1/alerts' | jq '.data.alerts[] | {alertname: .labels.alertname, state: .state}'

# Test an alert expression
curl -s 'http://localhost:9090/api/v1/query?query=up==0' | jq

# Check PVC usage (the immich alert query)
curl -s 'http://localhost:9090/api/v1/query?query=kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes*100' | jq '.data.result[] | {pvc: .metric.persistentvolumeclaim, namespace: .metric.namespace, percent: .value[1]}'

Web UIs

Service Access Method URL
Grafana kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80 http://localhost:3000
Prometheus kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 http://localhost:9090
AlertManager kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9093 http://localhost:9093
Longhorn kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 http://localhost:8080
Supabase Studio Ingress http://supabase.internal/project/default
Authentik Ingress https://auth.bogocat.com
Immich Ingress https://photos.bogocat.com
Jellyfin Ingress https://jellyfin.bogocat.com

Longhorn UI

# Access storage management UI
kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80
# Then: http://localhost:8080

# Useful for:
# - Viewing volume health
# - Managing snapshots/backups
# - Resizing volumes
# - Checking replica status

Discovery & Introspection

Finding Resources

# What resources exist in this namespace?
kubectl get all -n <namespace>

# What resource types are available?
kubectl api-resources

# What fields does a resource have?
kubectl explain deployment
kubectl explain deployment.spec.template

# What's the full YAML of a resource?
kubectl get deployment <name> -o yaml

# What labels does a resource have?
kubectl get pods --show-labels
kubectl get pods -l app=immich  # Filter by label

Finding Services & Endpoints

# What services are available?
kubectl get svc -A

# What endpoints back a service?
kubectl get endpoints <service-name>

# What ingresses exist?
kubectl get ingress -A

# What's the external URL for an ingress?
kubectl get ingress -n <namespace> -o jsonpath='{.items[*].spec.rules[*].host}'

Finding Logs & Events

# Recent events (great for debugging)
kubectl get events --sort-by='.lastTimestamp' | tail -20
kubectl get events -n <namespace> --sort-by='.lastTimestamp'

# Logs from a deployment (all pods)
kubectl logs -l app=<app-name> --all-containers

# Logs from previous crashed container
kubectl logs <pod> --previous

# Follow logs
kubectl logs -f <pod>

Finding Secrets & Configs

# What secrets exist?
kubectl get secrets -n <namespace>

# Decode a secret value
kubectl get secret <name> -n <namespace> -o jsonpath='{.data.<key>}' | base64 -d

# What configmaps exist?
kubectl get configmap -n <namespace>

# View configmap contents
kubectl get configmap <name> -o yaml

Finding Resource Usage

# Node resource usage
kubectl top nodes

# Pod resource usage (all namespaces)
kubectl top pods -A

# Pods using most CPU
kubectl top pods -A --sort-by=cpu | head -10

# Pods using most memory
kubectl top pods -A --sort-by=memory | head -10

# Storage usage per PVC
kubectl get pvc -A -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.metadata.name,CAPACITY:.spec.resources.requests.storage,STORAGECLASS:.spec.storageClassName'

Debugging Patterns

Why is my pod not starting?

# 1. Check pod status
kubectl get pod <pod> -n <namespace>

# 2. Check pod events (look at Events section at bottom)
kubectl describe pod <pod> -n <namespace>

# 3. Check logs
kubectl logs <pod> -n <namespace>

# 4. If container won't start, check previous logs
kubectl logs <pod> -n <namespace> --previous

Why is my service not reachable?

# 1. Check if pods are running
kubectl get pods -l app=<app> -n <namespace>

# 2. Check service selectors match pod labels
kubectl get svc <service> -o yaml | grep selector -A5
kubectl get pods --show-labels | grep <selector>

# 3. Check endpoints exist
kubectl get endpoints <service>

# 4. Test from inside cluster
kubectl run debug --rm -it --image=busybox -- wget -qO- http://<service>.<namespace>.svc:PORT

Why is my PVC stuck in Pending?

# 1. Check PVC events
kubectl describe pvc <pvc> -n <namespace>

# 2. Check storage class exists
kubectl get storageclass

# 3. Check Longhorn status
kubectl get volumes -n longhorn-system

# 4. Check Longhorn pods
kubectl get pods -n longhorn-system

Why is my deployment not updating?

# 1. Check rollout status
kubectl rollout status deployment/<name> -n <namespace>

# 2. Check rollout history
kubectl rollout history deployment/<name> -n <namespace>

# 3. Check deployment events
kubectl describe deployment <name> -n <namespace>

# 4. Force rollout restart
kubectl rollout restart deployment/<name> -n <namespace>

Helm Operations

# List installed releases
helm list -A

# Get values for a release
helm get values <release> -n <namespace>

# Get all release info
helm get all <release> -n <namespace>

# Upgrade a release
helm upgrade <release> <chart> -n <namespace> -f values.yaml

# Rollback to previous version
helm rollback <release> -n <namespace>

# Uninstall a release
helm uninstall <release> -n <namespace>

# Search for charts
helm search hub <keyword>
helm search repo <keyword>

Sealed Secrets

# Create a sealed secret
kubectl create secret generic <name> \
  --from-literal=KEY=VALUE \
  --dry-run=client -o yaml | \
  kubeseal --controller-name=sealed-secrets --controller-namespace=kube-system -o yaml > sealed-secret.yaml

# Apply sealed secret
kubectl apply -f sealed-secret.yaml

# View original secret (after unsealing)
kubectl get secret <name> -o yaml

Quick Reference: Common Combos

# "What's wrong with my app?"
kubectl get pods -n <ns> && kubectl get events -n <ns> --sort-by='.lastTimestamp' | tail -10

# "Show me everything in this namespace"
kubectl get all,ingress,pvc,configmap,secret -n <namespace>

# "Restart my app"
kubectl rollout restart deployment/<name> -n <namespace>

# "What's eating resources?"
kubectl top pods -A --sort-by=memory | head -10

# "Access this service locally"
kubectl port-forward svc/<service> <local-port>:<service-port> -n <namespace>

# "Get shell in a pod"
kubectl exec -it <pod> -n <namespace> -- sh

# "Watch pods starting up"
kubectl get pods -n <namespace> -w

# "Tail all logs from an app"
kubectl logs -f -l app=<app> --all-containers -n <namespace>

Next Steps