feat: initial Ollama MCP server

TypeScript MCP server wrapping the Ollama REST API. Provides tools for:
- Text generation and multi-turn chat
- Model management (list, show, pull, delete)
- Health check and running model status
- Embeddings generation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 16:46:19 +08:00
commit 3996e2f199
8 changed files with 1599 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
node_modules/
dist/
.env
+21
View File
@@ -0,0 +1,21 @@
{
"name": "@kollect/ollama-mcp",
"version": "0.1.0",
"description": "MCP server for Ollama local LLM integration",
"type": "module",
"main": "dist/index.js",
"scripts": {
"build": "tsc",
"start": "node dist/index.js",
"dev": "tsx src/index.ts"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.12.1",
"zod": "^4.3.6"
},
"devDependencies": {
"@types/node": "^22.0.0",
"tsx": "^4.19.0",
"typescript": "^5.7.0"
}
}
+1106
View File
File diff suppressed because it is too large Load Diff
+174
View File
@@ -0,0 +1,174 @@
/**
* Ollama HTTP API client.
*
* Wraps the Ollama REST API (default http://127.0.0.1:11434).
* Docs: https://github.com/ollama/ollama/blob/main/docs/api.md
*/
export interface OllamaConfig {
host: string;
}
export interface GenerateRequest {
model: string;
prompt: string;
system?: string;
temperature?: number;
max_tokens?: number;
format?: "json";
}
export interface ChatMessage {
role: "system" | "user" | "assistant";
content: string;
}
export interface ChatRequest {
model: string;
messages: ChatMessage[];
temperature?: number;
max_tokens?: number;
format?: "json";
}
export interface ModelInfo {
name: string;
model: string;
size: number;
details: {
parameter_size: string;
quantization_level: string;
family: string;
};
}
export interface PullProgress {
status: string;
digest?: string;
total?: number;
completed?: number;
}
export class OllamaClient {
private host: string;
constructor(config: OllamaConfig) {
this.host = config.host.replace(/\/+$/, "");
}
private async request(path: string, options?: RequestInit): Promise<Response> {
const url = `${this.host}${path}`;
const res = await fetch(url, options);
if (!res.ok) {
const body = await res.text().catch(() => "");
throw new Error(`Ollama API ${res.status}: ${body || res.statusText}`);
}
return res;
}
/** Generate a completion (non-streaming). */
async generate(req: GenerateRequest): Promise<string> {
const body: Record<string, unknown> = {
model: req.model,
prompt: req.prompt,
stream: false,
};
if (req.system) body.system = req.system;
if (req.temperature !== undefined) body.temperature = req.temperature;
if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
if (req.format) body.format = req.format;
const res = await this.request("/api/generate", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
});
const data = await res.json() as { response: string };
return data.response;
}
/** Multi-turn chat completion (non-streaming). */
async chat(req: ChatRequest): Promise<string> {
const body: Record<string, unknown> = {
model: req.model,
messages: req.messages,
stream: false,
};
if (req.temperature !== undefined) body.temperature = req.temperature;
if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
if (req.format) body.format = req.format;
const res = await this.request("/api/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(body),
});
const data = await res.json() as { message: { content: string } };
return data.message.content;
}
/** List locally available models. */
async listModels(): Promise<ModelInfo[]> {
const res = await this.request("/api/tags");
const data = await res.json() as { models: ModelInfo[] };
return data.models;
}
/** Get detailed info about a model. */
async showModel(name: string): Promise<Record<string, unknown>> {
const res = await this.request("/api/show", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name }),
});
return await res.json() as Record<string, unknown>;
}
/** Pull a model (blocking — waits for completion). */
async pullModel(name: string): Promise<string> {
const res = await this.request("/api/pull", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name, stream: false }),
});
const data = await res.json() as { status: string };
return data.status;
}
/** Delete a model. */
async deleteModel(name: string): Promise<void> {
await this.request("/api/delete", {
method: "DELETE",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name }),
});
}
/** Check if Ollama is reachable. */
async health(): Promise<boolean> {
try {
await this.request("/");
return true;
} catch {
return false;
}
}
/** List running models. */
async listRunning(): Promise<unknown[]> {
const res = await this.request("/api/ps");
const data = await res.json() as { models: unknown[] };
return data.models ?? [];
}
/** Generate embeddings. */
async embed(model: string, input: string | string[]): Promise<number[][]> {
const res = await this.request("/api/embed", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model, input }),
});
const data = await res.json() as { embeddings: number[][] };
return data.embeddings;
}
}
+97
View File
@@ -0,0 +1,97 @@
/**
* Tool handler implementations for Ollama MCP server.
*/
import type { OllamaClient, ChatMessage } from "./client.js";
function formatBytes(bytes: number): string {
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
export function createHandlers(client: OllamaClient) {
return {
ollama_generate: async (args: {
model: string;
prompt: string;
system?: string;
temperature?: number;
max_tokens?: number;
format?: "json";
}) => {
return await client.generate(args);
},
ollama_chat: async (args: {
model: string;
messages: ChatMessage[];
temperature?: number;
max_tokens?: number;
format?: "json";
}) => {
return await client.chat(args);
},
ollama_list_models: async () => {
const models = await client.listModels();
if (models.length === 0) return "No models installed. Use ollama_pull_model to download one.";
return models
.map(
(m) =>
`- ${m.name} (${formatBytes(m.size)}, ${m.details.parameter_size}, ${m.details.quantization_level}, family: ${m.details.family})`,
)
.join("\n");
},
ollama_show_model: async (args: { name: string }) => {
const info = await client.showModel(args.name);
// Return a readable subset — full response can be huge (includes template, license)
const details = info.details as Record<string, unknown> | undefined;
const params = info.model_info as Record<string, unknown> | undefined;
const lines = [`Model: ${args.name}`];
if (details) {
lines.push(`Family: ${details.family ?? "unknown"}`);
lines.push(`Parameters: ${details.parameter_size ?? "unknown"}`);
lines.push(`Quantization: ${details.quantization_level ?? "unknown"}`);
lines.push(`Format: ${details.format ?? "unknown"}`);
}
if (info.template) lines.push(`Template: (${(info.template as string).length} chars)`);
if (info.license) lines.push(`License: (${(info.license as string).length} chars)`);
if (params) {
const paramKeys = Object.keys(params).slice(0, 10);
lines.push(`Model info keys: ${paramKeys.join(", ")}${Object.keys(params).length > 10 ? "..." : ""}`);
}
return lines.join("\n");
},
ollama_pull_model: async (args: { name: string }) => {
const status = await client.pullModel(args.name);
return `Pull complete: ${args.name}${status}`;
},
ollama_delete_model: async (args: { name: string }) => {
await client.deleteModel(args.name);
return `Deleted: ${args.name}`;
},
ollama_health: async () => {
const ok = await client.health();
return ok ? "Ollama is running and reachable." : "Ollama is not reachable.";
},
ollama_list_running: async () => {
const models = await client.listRunning();
if (models.length === 0) return "No models currently loaded in memory.";
return JSON.stringify(models, null, 2);
},
ollama_embed: async (args: { model: string; input: string | string[] }) => {
const embeddings = await client.embed(args.model, args.input);
const count = embeddings.length;
const dim = embeddings[0]?.length ?? 0;
return `Generated ${count} embedding(s), dimension: ${dim}\n\n${JSON.stringify(embeddings)}`;
},
};
}
+63
View File
@@ -0,0 +1,63 @@
#!/usr/bin/env node
/**
* Ollama MCP Server
*
* Connects AI assistants to a local Ollama instance for LLM inference,
* model management, and embeddings.
*
* Environment variables:
* OLLAMA_HOST — Base URL of the Ollama server (default: http://127.0.0.1:11434)
*/
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { OllamaClient } from "./client.js";
import { toolDefs } from "./tools.js";
import { createHandlers } from "./handlers.js";
const host = process.env.OLLAMA_HOST ?? "http://127.0.0.1:11434";
const client = new OllamaClient({ host });
const handlers = createHandlers(client);
const server = new McpServer({
name: "ollama",
version: "0.1.0",
});
// Register each tool from toolDefs with its corresponding handler.
for (const [name, def] of Object.entries(toolDefs)) {
const handler = handlers[name as keyof typeof handlers];
if (!handler) {
console.error(`No handler for tool: ${name}`);
continue;
}
server.tool(
name,
def.description,
def.inputSchema,
async (args: any) => {
try {
const result = await (handler as Function)(args);
return { content: [{ type: "text" as const, text: String(result) }] };
} catch (err: any) {
return {
content: [{ type: "text" as const, text: `Error: ${err.message}` }],
isError: true,
};
}
},
);
}
async function main() {
const transport = new StdioServerTransport();
await server.connect(transport);
console.error(`Ollama MCP server running on stdio (host: ${host})`);
}
main().catch((err) => {
console.error("Fatal error:", err);
process.exit(1);
});
+121
View File
@@ -0,0 +1,121 @@
/**
* MCP tool definitions for Ollama.
*
* Each tool has a description and a Zod input schema.
*/
import { z } from "zod";
export const toolDefs = {
// ── Generation ──
ollama_generate: {
description:
"Generate a text completion using a local Ollama model. Use for code generation, review, explanation, or any text task.",
inputSchema: {
model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
prompt: z.string().describe("The prompt to send to the model"),
system: z
.string()
.optional()
.describe("System prompt to set context/persona"),
temperature: z
.number()
.min(0)
.max(2)
.optional()
.describe("Sampling temperature (0=deterministic, default ~0.7)"),
max_tokens: z
.number()
.int()
.positive()
.optional()
.describe("Maximum tokens to generate"),
format: z
.enum(["json"])
.optional()
.describe("Set to 'json' to force JSON output"),
},
},
ollama_chat: {
description:
"Multi-turn chat with a local Ollama model. Send a conversation history for context-aware responses.",
inputSchema: {
model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
messages: z
.array(
z.object({
role: z.enum(["system", "user", "assistant"]),
content: z.string(),
}),
)
.describe("Conversation messages array"),
temperature: z
.number()
.min(0)
.max(2)
.optional()
.describe("Sampling temperature"),
max_tokens: z
.number()
.int()
.positive()
.optional()
.describe("Maximum tokens to generate"),
format: z
.enum(["json"])
.optional()
.describe("Set to 'json' to force JSON output"),
},
},
// ── Model Management ──
ollama_list_models: {
description: "List all locally available Ollama models with size and quantization details.",
inputSchema: {},
},
ollama_show_model: {
description: "Get detailed information about a specific model (parameters, template, license).",
inputSchema: {
name: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
},
},
ollama_pull_model: {
description: "Download a model from the Ollama registry. Blocks until complete.",
inputSchema: {
name: z.string().describe("Model name to pull (e.g. qwen2.5-coder:7b)"),
},
},
ollama_delete_model: {
description: "Delete a locally downloaded model to free disk space.",
inputSchema: {
name: z.string().describe("Model name to delete"),
},
},
// ── Status ──
ollama_health: {
description: "Check if the Ollama server is running and reachable.",
inputSchema: {},
},
ollama_list_running: {
description: "List currently loaded/running models in memory.",
inputSchema: {},
},
// ── Embeddings ──
ollama_embed: {
description:
"Generate embeddings for text using a local model. Useful for semantic search and similarity.",
inputSchema: {
model: z.string().describe("Model name for embeddings"),
input: z
.union([z.string(), z.array(z.string())])
.describe("Text or array of texts to embed"),
},
},
} as const;
+14
View File
@@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "Node16",
"moduleResolution": "Node16",
"outDir": "dist",
"rootDir": "src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"declaration": true
},
"include": ["src/**/*"]
}