Quick Command Reference¶
Common commands for managing the tower-fleet k3s cluster.
Cluster Health¶
# View all nodes
kubectl get nodes
kubectl get nodes -o wide
# Check all system pods
kubectl get pods -A
# Cluster info
kubectl cluster-info
# Component status
kubectl get componentstatuses
# Events (useful for debugging)
kubectl get events --sort-by='.lastTimestamp'
SSH Access¶
k3s Service Management¶
# On master (201)
ssh root@10.89.97.201 'systemctl status k3s'
ssh root@10.89.97.201 'systemctl restart k3s'
ssh root@10.89.97.201 'journalctl -u k3s -f'
# On workers (202, 203)
ssh root@10.89.97.202 'systemctl status k3s-agent'
ssh root@10.89.97.202 'systemctl restart k3s-agent'
ssh root@10.89.97.202 'journalctl -u k3s-agent -f'
Proxmox VM Management¶
# List VMs
qm list
# Start/stop/restart
qm start 201 202 203
qm stop 201 202 203
qm restart 201
# VM status
qm status 201
# Destroy VMs (careful!)
qm destroy 201 202 203
Pods¶
# List all pods
kubectl get pods -A
# Pods in specific namespace
kubectl get pods -n kube-system
# Watch pods
kubectl get pods -A --watch
# Describe pod
kubectl describe pod/coredns-xxx -n kube-system
# Logs
kubectl logs pod-name
kubectl logs -f pod-name # Follow
kubectl logs pod-name -n namespace-name
# Execute command in pod
kubectl exec -it pod-name -- bash
kubectl exec pod-name -- ls /app
Deployments¶
# List deployments
kubectl get deployments -A
# Scale deployment
kubectl scale deployment/my-app --replicas=3
# Update image
kubectl set image deployment/my-app container-name=new-image:v2
# Rollout status
kubectl rollout status deployment/my-app
# Rollback
kubectl rollout undo deployment/my-app
# Restart deployment (rolling restart)
kubectl rollout restart deployment/my-app
Services¶
# List services
kubectl get svc -A
# Get LoadBalancer IPs
kubectl get svc -o wide
# Describe service
kubectl describe svc/my-service
Namespaces¶
# List namespaces
kubectl get namespaces
kubectl get ns # Short form
# Create namespace
kubectl create namespace my-app
# Delete namespace (deletes all resources in it!)
kubectl delete namespace my-app
# Set default namespace for context
kubectl config set-context --current --namespace=my-app
Apply/Delete Resources¶
# Apply YAML file
kubectl apply -f deployment.yaml
# Apply directory
kubectl apply -f ./manifests/
# Apply from URL
kubectl apply -f https://example.com/manifest.yaml
# Delete resources
kubectl delete -f deployment.yaml
kubectl delete deployment/my-app
kubectl delete pod/my-app-xxx
# Delete all in namespace
kubectl delete all --all -n namespace-name
Troubleshooting¶
# View events
kubectl get events --sort-by='.lastTimestamp'
kubectl get events -n kube-system
# Describe (shows events at bottom)
kubectl describe node/k3s-master
kubectl describe pod/my-pod
# Resource usage
kubectl top nodes
kubectl top pods -A
# Port forward (access pod locally)
kubectl port-forward pod/my-app 8080:80
kubectl port-forward svc/my-service 8080:80
# Debug pod
kubectl run debug --rm -it --image=busybox -- sh
Kubeconfig Management¶
# View current context
kubectl config current-context
# List all contexts
kubectl config get-contexts
# Switch context
kubectl config use-context other-cluster
# View merged config
kubectl config view
# Set namespace for current context
kubectl config set-context --current --namespace=my-app
Storage (Longhorn)¶
# List PVCs
kubectl get pvc -A
# List PVs
kubectl get pv
# List storage classes
kubectl get storageclass
kubectl get sc # Short form
# Resize PVC (Longhorn supports online expansion)
kubectl patch pvc <pvc-name> -n <namespace> \
-p '{"spec":{"resources":{"requests":{"storage":"30Gi"}}}}'
Docker Registry¶
# Registry URL (NodePort)
# http://10.89.97.201:30500
# List all images
curl -s http://10.89.97.201:30500/v2/_catalog | jq
# List tags for an image
curl -s http://10.89.97.201:30500/v2/<image>/tags/list | jq
# Check registry disk usage
kubectl exec -n docker-registry deploy/docker-registry -- df -h /var/lib/registry
# Cleanup old tags (dry run)
/root/tower-fleet/scripts/registry-cleanup.sh
# Cleanup old tags (execute + garbage collection)
/root/tower-fleet/scripts/registry-cleanup.sh --execute --gc
# Cleanup with custom retention (keep 3 versions)
/root/tower-fleet/scripts/registry-cleanup.sh --execute --gc --keep 3
# Resize registry PVC
kubectl patch pvc docker-registry-pvc -n docker-registry \
-p '{"spec":{"resources":{"requests":{"storage":"30Gi"}}}}'
GitOps (Flux - after Phase 4)¶
# Check Flux status
flux check
# Reconcile (force sync from git)
flux reconcile kustomization apps --with-source
# View Flux resources
flux get all
# Suspend/resume
flux suspend kustomization apps
flux resume kustomization apps
Observability (after Phase 5)¶
# Access Grafana (after deployment)
kubectl port-forward -n monitoring svc/grafana 3000:80
# Then: http://localhost:3000
# Check Prometheus
kubectl port-forward -n monitoring svc/prometheus 9090:9090
# Then: http://localhost:9090
Disaster Recovery¶
# Backup kubeconfig
cp ~/.kube/config ~/.kube/config.backup.$(date +%Y%m%d)
# Export all resources in namespace
kubectl get all -n my-app -o yaml > backup-my-app.yaml
# Get join token (to add new nodes)
ssh root@10.89.97.201 'cat /var/lib/rancher/k3s/server/node-token'
Quick Deployment Example¶
# Create deployment
kubectl create deployment nginx --image=nginx
# Expose as LoadBalancer
kubectl expose deployment nginx --port=80 --type=LoadBalancer
# Get service IP
kubectl get svc nginx
# Test
curl http://LOADBALANCER_IP
# Clean up
kubectl delete deployment nginx
kubectl delete svc nginx
Useful Aliases¶
Add to ~/.bashrc:
# kubectl shorthand
alias k='kubectl'
# Common kubectl commands
alias kgp='kubectl get pods'
alias kgs='kubectl get svc'
alias kgn='kubectl get nodes'
alias kga='kubectl get all'
alias kdp='kubectl describe pod'
alias kl='kubectl logs'
alias kx='kubectl exec -it'
# Watch
alias wkgp='watch kubectl get pods'
Monitoring & Alerting¶
Prometheus (Metrics)¶
# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090
# Then: http://localhost:9090
# Quick query from CLI
kubectl exec -n monitoring prometheus-kube-prometheus-stack-prometheus-0 -- \
wget -qO- 'http://localhost:9090/api/v1/query?query=up' | jq
# List all PrometheusRules
kubectl get prometheusrules -A
# View specific rule
kubectl get prometheusrule <name> -n <namespace> -o yaml
# Edit alert rules
kubectl edit prometheusrule <name> -n <namespace>
# Apply alert rules from manifest
kubectl apply -f /root/tower-fleet/manifests/apps/<app>/prometheus-rules.yaml
AlertManager (Notifications)¶
# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9093
# Then: http://localhost:9093
# View active alerts
kubectl exec -n monitoring alertmanager-kube-prometheus-stack-alertmanager-0 -- \
wget -qO- 'http://localhost:9093/api/v2/alerts' | jq
# Silence alerts via UI (easier than CLI)
# Go to http://localhost:9093 → find alert → click "Silence"
# Check AlertManager logs
kubectl logs -n monitoring alertmanager-kube-prometheus-stack-alertmanager-0
Grafana (Dashboards)¶
# Port-forward to access UI
kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80
# Then: http://localhost:3000
# Get admin password
kubectl get secret -n monitoring kube-prometheus-stack-grafana \
-o jsonpath='{.data.admin-password}' | base64 -d; echo
# Default user: admin
Loki (Logs)¶
# Port-forward to access
kubectl port-forward -n monitoring svc/loki-gateway 3100:80
# Query logs via CLI
kubectl exec -n monitoring deploy/loki-gateway -- \
wget -qO- 'http://loki:3100/loki/api/v1/labels' | jq
# View via Grafana "Explore" tab (recommended)
Common Alert Queries¶
# Check which alerts are firing
kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 &
curl -s 'http://localhost:9090/api/v1/alerts' | jq '.data.alerts[] | {alertname: .labels.alertname, state: .state}'
# Test an alert expression
curl -s 'http://localhost:9090/api/v1/query?query=up==0' | jq
# Check PVC usage (the immich alert query)
curl -s 'http://localhost:9090/api/v1/query?query=kubelet_volume_stats_used_bytes/kubelet_volume_stats_capacity_bytes*100' | jq '.data.result[] | {pvc: .metric.persistentvolumeclaim, namespace: .metric.namespace, percent: .value[1]}'
Web UIs¶
| Service | Access Method | URL |
|---|---|---|
| Grafana | kubectl port-forward -n monitoring svc/kube-prometheus-stack-grafana 3000:80 |
http://localhost:3000 |
| Prometheus | kubectl port-forward -n monitoring svc/kube-prometheus-stack-prometheus 9090:9090 |
http://localhost:9090 |
| AlertManager | kubectl port-forward -n monitoring svc/kube-prometheus-stack-alertmanager 9093:9093 |
http://localhost:9093 |
| Longhorn | kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80 |
http://localhost:8080 |
| Supabase Studio | Ingress | http://supabase.internal/project/default |
| Authentik | Ingress | https://auth.bogocat.com |
| Immich | Ingress | https://photos.bogocat.com |
| Jellyfin | Ingress | https://jellyfin.bogocat.com |
Longhorn UI¶
# Access storage management UI
kubectl port-forward -n longhorn-system svc/longhorn-frontend 8080:80
# Then: http://localhost:8080
# Useful for:
# - Viewing volume health
# - Managing snapshots/backups
# - Resizing volumes
# - Checking replica status
Discovery & Introspection¶
Finding Resources¶
# What resources exist in this namespace?
kubectl get all -n <namespace>
# What resource types are available?
kubectl api-resources
# What fields does a resource have?
kubectl explain deployment
kubectl explain deployment.spec.template
# What's the full YAML of a resource?
kubectl get deployment <name> -o yaml
# What labels does a resource have?
kubectl get pods --show-labels
kubectl get pods -l app=immich # Filter by label
Finding Services & Endpoints¶
# What services are available?
kubectl get svc -A
# What endpoints back a service?
kubectl get endpoints <service-name>
# What ingresses exist?
kubectl get ingress -A
# What's the external URL for an ingress?
kubectl get ingress -n <namespace> -o jsonpath='{.items[*].spec.rules[*].host}'
Finding Logs & Events¶
# Recent events (great for debugging)
kubectl get events --sort-by='.lastTimestamp' | tail -20
kubectl get events -n <namespace> --sort-by='.lastTimestamp'
# Logs from a deployment (all pods)
kubectl logs -l app=<app-name> --all-containers
# Logs from previous crashed container
kubectl logs <pod> --previous
# Follow logs
kubectl logs -f <pod>
Finding Secrets & Configs¶
# What secrets exist?
kubectl get secrets -n <namespace>
# Decode a secret value
kubectl get secret <name> -n <namespace> -o jsonpath='{.data.<key>}' | base64 -d
# What configmaps exist?
kubectl get configmap -n <namespace>
# View configmap contents
kubectl get configmap <name> -o yaml
Finding Resource Usage¶
# Node resource usage
kubectl top nodes
# Pod resource usage (all namespaces)
kubectl top pods -A
# Pods using most CPU
kubectl top pods -A --sort-by=cpu | head -10
# Pods using most memory
kubectl top pods -A --sort-by=memory | head -10
# Storage usage per PVC
kubectl get pvc -A -o custom-columns='NAMESPACE:.metadata.namespace,NAME:.metadata.name,CAPACITY:.spec.resources.requests.storage,STORAGECLASS:.spec.storageClassName'
Debugging Patterns¶
Why is my pod not starting?¶
# 1. Check pod status
kubectl get pod <pod> -n <namespace>
# 2. Check pod events (look at Events section at bottom)
kubectl describe pod <pod> -n <namespace>
# 3. Check logs
kubectl logs <pod> -n <namespace>
# 4. If container won't start, check previous logs
kubectl logs <pod> -n <namespace> --previous
Why is my service not reachable?¶
# 1. Check if pods are running
kubectl get pods -l app=<app> -n <namespace>
# 2. Check service selectors match pod labels
kubectl get svc <service> -o yaml | grep selector -A5
kubectl get pods --show-labels | grep <selector>
# 3. Check endpoints exist
kubectl get endpoints <service>
# 4. Test from inside cluster
kubectl run debug --rm -it --image=busybox -- wget -qO- http://<service>.<namespace>.svc:PORT
Why is my PVC stuck in Pending?¶
# 1. Check PVC events
kubectl describe pvc <pvc> -n <namespace>
# 2. Check storage class exists
kubectl get storageclass
# 3. Check Longhorn status
kubectl get volumes -n longhorn-system
# 4. Check Longhorn pods
kubectl get pods -n longhorn-system
Why is my deployment not updating?¶
# 1. Check rollout status
kubectl rollout status deployment/<name> -n <namespace>
# 2. Check rollout history
kubectl rollout history deployment/<name> -n <namespace>
# 3. Check deployment events
kubectl describe deployment <name> -n <namespace>
# 4. Force rollout restart
kubectl rollout restart deployment/<name> -n <namespace>
Helm Operations¶
# List installed releases
helm list -A
# Get values for a release
helm get values <release> -n <namespace>
# Get all release info
helm get all <release> -n <namespace>
# Upgrade a release
helm upgrade <release> <chart> -n <namespace> -f values.yaml
# Rollback to previous version
helm rollback <release> -n <namespace>
# Uninstall a release
helm uninstall <release> -n <namespace>
# Search for charts
helm search hub <keyword>
helm search repo <keyword>
Sealed Secrets¶
# Create a sealed secret
kubectl create secret generic <name> \
--from-literal=KEY=VALUE \
--dry-run=client -o yaml | \
kubeseal --controller-name=sealed-secrets --controller-namespace=kube-system -o yaml > sealed-secret.yaml
# Apply sealed secret
kubectl apply -f sealed-secret.yaml
# View original secret (after unsealing)
kubectl get secret <name> -o yaml
Quick Reference: Common Combos¶
# "What's wrong with my app?"
kubectl get pods -n <ns> && kubectl get events -n <ns> --sort-by='.lastTimestamp' | tail -10
# "Show me everything in this namespace"
kubectl get all,ingress,pvc,configmap,secret -n <namespace>
# "Restart my app"
kubectl rollout restart deployment/<name> -n <namespace>
# "What's eating resources?"
kubectl top pods -A --sort-by=memory | head -10
# "Access this service locally"
kubectl port-forward svc/<service> <local-port>:<service-port> -n <namespace>
# "Get shell in a pod"
kubectl exec -it <pod> -n <namespace> -- sh
# "Watch pods starting up"
kubectl get pods -n <namespace> -w
# "Tail all logs from an app"
kubectl logs -f -l app=<app> --all-containers -n <namespace>
Next Steps¶
- Troubleshooting - Common issues and solutions
- Alerting Guide - Alert configuration and management
- Kubernetes Cluster Setup - Deep dive into kubectl and kubeconfig