{"providers":[{"provider":"ollama","label":"Ollama (local)","available":true,"default":"qwen2.5:7b-instruct","models":[{"id":"llama3.2:3b","label":"Llama 3.2 3B","publisher":"Meta","ram_gb":4,"notes":"Smallest Llama, fastest on CPU. Llama 4 is MoE-only and needs more RAM.","pulled":true},{"id":"qwen3:8b","label":"Qwen 3 8B","publisher":"Alibaba","ram_gb":8,"notes":"Latest Qwen 3 generation (replaces Qwen 2.5). Strong multilingual + coding.","pulled":true},{"id":"phi4:14b","label":"Phi-4 14B","publisher":"Microsoft","ram_gb":12,"notes":"Reasoning-focused. Strong on STEM. Tight on 8GB; comfortable on 16GB.","pulled":true},{"id":"gemma3:12b","label":"Gemma 3 12B","publisher":"Google","ram_gb":10,"notes":"Latest Gemma 3 with strong tool calling. Tight on 8GB; comfortable on 16GB.","pulled":true},{"id":"deepseek-r1:14b","label":"DeepSeek R1 14B","publisher":"DeepSeek","ram_gb":12,"notes":"Reasoning chain-of-thought. Slow first-token (thinks before answering).","pulled":true},{"id":"nemotron-mini:4b","label":"Nemotron Mini 4B","publisher":"NVIDIA","ram_gb":5,"notes":"NVIDIA-tuned small model. CPU-friendly.","pulled":true},{"id":"mistral-small3.1:latest","label":"Mistral Small 3.1 (24B)","publisher":"Mistral","ram_gb":16,"notes":"Latest Mistral Small (March 2026). Near-70B quality at 14GB. Needs 16GB+.","pulled":true},{"id":"qwen2.5:7b-instruct","label":"Qwen 2.5 7B (legacy)","publisher":"Alibaba","ram_gb":6,"notes":"Previous gen. Already pulled. Will be retired after Qwen 3 8B is verified.","pulled":true}],"notes":"Local models on this VM. Slower (~10-60s on CPU) but free and private. Latest from Meta, Alibaba, Microsoft, Google, DeepSeek, NVIDIA, Mistral."},{"provider":"anthropic","label":"Anthropic Claude","available":true,"default":"claude-opus-4-7","models":[{"id":"claude-opus-4-7","label":"Claude Opus 4.7 (smartest)","publisher":"Anthropic","pulled":true},{"id":"claude-sonnet-4-6","label":"Claude Sonnet 4.6 (balanced)","publisher":"Anthropic","pulled":true},{"id":"claude-haiku-4-5","label":"Claude Haiku 4.5 (fastest)","publisher":"Anthropic","pulled":true}],"notes":"Cloud API. Fast (~2-7s) and high quality. Costs apply per token."},{"provider":"openai","label":"OpenAI","available":false,"default":"gpt-4o","models":[{"id":"gpt-4o","label":"GPT-4o","publisher":"OpenAI","pulled":true},{"id":"gpt-4o-mini","label":"GPT-4o mini (cheaper)","publisher":"OpenAI","pulled":true}],"notes":"Cloud API. Costs apply per token."}]}