csghone · April 26, 2026 16:37
diff --git a/ollama.sh b/ollama.sh
 # Manual installation
 tar --use-compress-program=zstd -xvf  ollama-linux-amd64.tar.zst

 # Pull model (You might want to soft-link /usr/share/ollama to somewhere else so as to not fill up root partition)
 # Alternatively override default path using OLLAMA_MODELS environment variable before running.
 # Also edit in /etc/systemd/system/ollama.service if needed
 ollama pull gemma4:26b # To pu

 # Manually runs only - no daemon
 sudo systemctl disable ollama.service

 # Launch in CPU mode
 CUDA_VISIBLE_DEVICES=-1 OLLAMA_NUM_PARALLEL=4 OLLAMA_HOST=0.0.0.0  OLLAMA_CONTEXT_LENGTH=$((128*1024)) ./bin/ollama serve

 # Launch in dual GPU mode (2nd and 3rd GPU if you have three GPUs). Leave empty to use all available
 CUDA_VISIBLE_DEVICES="1,2" OLLAMA_NUM_PARALLEL=4 OLLAMA_HOST=0.0.0.0  OLLAMA_CONTEXT_LENGTH=$((128*1024)) ./bin/ollama serve

 # Check ollama status
 ./bin/ollama ps
	# Manual installation
	tar --use-compress-program=zstd -xvf ollama-linux-amd64.tar.zst

	# Pull model (You might want to soft-link /usr/share/ollama to somewhere else so as to not fill up root partition)
	# Alternatively override default path using OLLAMA_MODELS environment variable before running.
	# Also edit in /etc/systemd/system/ollama.service if needed
	ollama pull gemma4:26b # To pu

	# Manually runs only - no daemon
	sudo systemctl disable ollama.service

	# Launch in CPU mode
	CUDA_VISIBLE_DEVICES=-1 OLLAMA_NUM_PARALLEL=4 OLLAMA_HOST=0.0.0.0 OLLAMA_CONTEXT_LENGTH=$((128*1024)) ./bin/ollama serve

	# Launch in dual GPU mode (2nd and 3rd GPU if you have three GPUs). Leave empty to use all available
	CUDA_VISIBLE_DEVICES="1,2" OLLAMA_NUM_PARALLEL=4 OLLAMA_HOST=0.0.0.0 OLLAMA_CONTEXT_LENGTH=$((128*1024)) ./bin/ollama serve

	# Check ollama status
	./bin/ollama ps
No results found