feat: initial Ollama MCP server

TypeScript MCP server wrapping the Ollama REST API. Provides tools for: - Text generation and multi-turn chat - Model management (list, show, pull, delete) - Health check and running model status - Embeddings generation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 16:46:19 +08:00
commit 3996e2f199
8 changed files with 1599 additions and 0 deletions
@@ -0,0 +1,3 @@
+node_modules/
+dist/
+.env
@@ -0,0 +1,21 @@
+{
+  "name": "@kollect/ollama-mcp",
+  "version": "0.1.0",
+  "description": "MCP server for Ollama local LLM integration",
+  "type": "module",
+  "main": "dist/index.js",
+  "scripts": {
+    "build": "tsc",
+    "start": "node dist/index.js",
+    "dev": "tsx src/index.ts"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.12.1",
+    "zod": "^4.3.6"
+  },
+  "devDependencies": {
+    "@types/node": "^22.0.0",
+    "tsx": "^4.19.0",
+    "typescript": "^5.7.0"
+  }
+}
@@ -0,0 +1,174 @@
+/**
+ * Ollama HTTP API client.
+ *
+ * Wraps the Ollama REST API (default http://127.0.0.1:11434).
+ * Docs: https://github.com/ollama/ollama/blob/main/docs/api.md
+ */
+
+export interface OllamaConfig {
+  host: string;
+}
+
+export interface GenerateRequest {
+  model: string;
+  prompt: string;
+  system?: string;
+  temperature?: number;
+  max_tokens?: number;
+  format?: "json";
+}
+
+export interface ChatMessage {
+  role: "system" | "user" | "assistant";
+  content: string;
+}
+
+export interface ChatRequest {
+  model: string;
+  messages: ChatMessage[];
+  temperature?: number;
+  max_tokens?: number;
+  format?: "json";
+}
+
+export interface ModelInfo {
+  name: string;
+  model: string;
+  size: number;
+  details: {
+    parameter_size: string;
+    quantization_level: string;
+    family: string;
+  };
+}
+
+export interface PullProgress {
+  status: string;
+  digest?: string;
+  total?: number;
+  completed?: number;
+}
+
+export class OllamaClient {
+  private host: string;
+
+  constructor(config: OllamaConfig) {
+    this.host = config.host.replace(/\/+$/, "");
+  }
+
+  private async request(path: string, options?: RequestInit): Promise<Response> {
+    const url = `${this.host}${path}`;
+    const res = await fetch(url, options);
+    if (!res.ok) {
+      const body = await res.text().catch(() => "");
+      throw new Error(`Ollama API ${res.status}: ${body || res.statusText}`);
+    }
+    return res;
+  }
+
+  /** Generate a completion (non-streaming). */
+  async generate(req: GenerateRequest): Promise<string> {
+    const body: Record<string, unknown> = {
+      model: req.model,
+      prompt: req.prompt,
+      stream: false,
+    };
+    if (req.system) body.system = req.system;
+    if (req.temperature !== undefined) body.temperature = req.temperature;
+    if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
+    if (req.format) body.format = req.format;
+
+    const res = await this.request("/api/generate", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+    });
+    const data = await res.json() as { response: string };
+    return data.response;
+  }
+
+  /** Multi-turn chat completion (non-streaming). */
+  async chat(req: ChatRequest): Promise<string> {
+    const body: Record<string, unknown> = {
+      model: req.model,
+      messages: req.messages,
+      stream: false,
+    };
+    if (req.temperature !== undefined) body.temperature = req.temperature;
+    if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
+    if (req.format) body.format = req.format;
+
+    const res = await this.request("/api/chat", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify(body),
+    });
+    const data = await res.json() as { message: { content: string } };
+    return data.message.content;
+  }
+
+  /** List locally available models. */
+  async listModels(): Promise<ModelInfo[]> {
+    const res = await this.request("/api/tags");
+    const data = await res.json() as { models: ModelInfo[] };
+    return data.models;
+  }
+
+  /** Get detailed info about a model. */
+  async showModel(name: string): Promise<Record<string, unknown>> {
+    const res = await this.request("/api/show", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ name }),
+    });
+    return await res.json() as Record<string, unknown>;
+  }
+
+  /** Pull a model (blocking — waits for completion). */
+  async pullModel(name: string): Promise<string> {
+    const res = await this.request("/api/pull", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ name, stream: false }),
+    });
+    const data = await res.json() as { status: string };
+    return data.status;
+  }
+
+  /** Delete a model. */
+  async deleteModel(name: string): Promise<void> {
+    await this.request("/api/delete", {
+      method: "DELETE",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ name }),
+    });
+  }
+
+  /** Check if Ollama is reachable. */
+  async health(): Promise<boolean> {
+    try {
+      await this.request("/");
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /** List running models. */
+  async listRunning(): Promise<unknown[]> {
+    const res = await this.request("/api/ps");
+    const data = await res.json() as { models: unknown[] };
+    return data.models ?? [];
+  }
+
+  /** Generate embeddings. */
+  async embed(model: string, input: string | string[]): Promise<number[][]> {
+    const res = await this.request("/api/embed", {
+      method: "POST",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ model, input }),
+    });
+    const data = await res.json() as { embeddings: number[][] };
+    return data.embeddings;
+  }
+}
@@ -0,0 +1,97 @@
+/**
+ * Tool handler implementations for Ollama MCP server.
+ */
+
+import type { OllamaClient, ChatMessage } from "./client.js";
+
+function formatBytes(bytes: number): string {
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
+  if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+  return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
+}
+
+export function createHandlers(client: OllamaClient) {
+  return {
+    ollama_generate: async (args: {
+      model: string;
+      prompt: string;
+      system?: string;
+      temperature?: number;
+      max_tokens?: number;
+      format?: "json";
+    }) => {
+      return await client.generate(args);
+    },
+
+    ollama_chat: async (args: {
+      model: string;
+      messages: ChatMessage[];
+      temperature?: number;
+      max_tokens?: number;
+      format?: "json";
+    }) => {
+      return await client.chat(args);
+    },
+
+    ollama_list_models: async () => {
+      const models = await client.listModels();
+      if (models.length === 0) return "No models installed. Use ollama_pull_model to download one.";
+
+      return models
+        .map(
+          (m) =>
+            `- ${m.name} (${formatBytes(m.size)}, ${m.details.parameter_size}, ${m.details.quantization_level}, family: ${m.details.family})`,
+        )
+        .join("\n");
+    },
+
+    ollama_show_model: async (args: { name: string }) => {
+      const info = await client.showModel(args.name);
+      // Return a readable subset — full response can be huge (includes template, license)
+      const details = info.details as Record<string, unknown> | undefined;
+      const params = info.model_info as Record<string, unknown> | undefined;
+      const lines = [`Model: ${args.name}`];
+      if (details) {
+        lines.push(`Family: ${details.family ?? "unknown"}`);
+        lines.push(`Parameters: ${details.parameter_size ?? "unknown"}`);
+        lines.push(`Quantization: ${details.quantization_level ?? "unknown"}`);
+        lines.push(`Format: ${details.format ?? "unknown"}`);
+      }
+      if (info.template) lines.push(`Template: (${(info.template as string).length} chars)`);
+      if (info.license) lines.push(`License: (${(info.license as string).length} chars)`);
+      if (params) {
+        const paramKeys = Object.keys(params).slice(0, 10);
+        lines.push(`Model info keys: ${paramKeys.join(", ")}${Object.keys(params).length > 10 ? "..." : ""}`);
+      }
+      return lines.join("\n");
+    },
+
+    ollama_pull_model: async (args: { name: string }) => {
+      const status = await client.pullModel(args.name);
+      return `Pull complete: ${args.name} — ${status}`;
+    },
+
+    ollama_delete_model: async (args: { name: string }) => {
+      await client.deleteModel(args.name);
+      return `Deleted: ${args.name}`;
+    },
+
+    ollama_health: async () => {
+      const ok = await client.health();
+      return ok ? "Ollama is running and reachable." : "Ollama is not reachable.";
+    },
+
+    ollama_list_running: async () => {
+      const models = await client.listRunning();
+      if (models.length === 0) return "No models currently loaded in memory.";
+      return JSON.stringify(models, null, 2);
+    },
+
+    ollama_embed: async (args: { model: string; input: string | string[] }) => {
+      const embeddings = await client.embed(args.model, args.input);
+      const count = embeddings.length;
+      const dim = embeddings[0]?.length ?? 0;
+      return `Generated ${count} embedding(s), dimension: ${dim}\n\n${JSON.stringify(embeddings)}`;
+    },
+  };
+}
@@ -0,0 +1,63 @@
+#!/usr/bin/env node
+/**
+ * Ollama MCP Server
+ *
+ * Connects AI assistants to a local Ollama instance for LLM inference,
+ * model management, and embeddings.
+ *
+ * Environment variables:
+ *   OLLAMA_HOST — Base URL of the Ollama server (default: http://127.0.0.1:11434)
+ */
+
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { OllamaClient } from "./client.js";
+import { toolDefs } from "./tools.js";
+import { createHandlers } from "./handlers.js";
+
+const host = process.env.OLLAMA_HOST ?? "http://127.0.0.1:11434";
+
+const client = new OllamaClient({ host });
+const handlers = createHandlers(client);
+
+const server = new McpServer({
+  name: "ollama",
+  version: "0.1.0",
+});
+
+// Register each tool from toolDefs with its corresponding handler.
+for (const [name, def] of Object.entries(toolDefs)) {
+  const handler = handlers[name as keyof typeof handlers];
+  if (!handler) {
+    console.error(`No handler for tool: ${name}`);
+    continue;
+  }
+
+  server.tool(
+    name,
+    def.description,
+    def.inputSchema,
+    async (args: any) => {
+      try {
+        const result = await (handler as Function)(args);
+        return { content: [{ type: "text" as const, text: String(result) }] };
+      } catch (err: any) {
+        return {
+          content: [{ type: "text" as const, text: `Error: ${err.message}` }],
+          isError: true,
+        };
+      }
+    },
+  );
+}
+
+async function main() {
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+  console.error(`Ollama MCP server running on stdio (host: ${host})`);
+}
+
+main().catch((err) => {
+  console.error("Fatal error:", err);
+  process.exit(1);
+});
@@ -0,0 +1,121 @@
+/**
+ * MCP tool definitions for Ollama.
+ *
+ * Each tool has a description and a Zod input schema.
+ */
+
+import { z } from "zod";
+
+export const toolDefs = {
+  // ── Generation ──
+  ollama_generate: {
+    description:
+      "Generate a text completion using a local Ollama model. Use for code generation, review, explanation, or any text task.",
+    inputSchema: {
+      model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
+      prompt: z.string().describe("The prompt to send to the model"),
+      system: z
+        .string()
+        .optional()
+        .describe("System prompt to set context/persona"),
+      temperature: z
+        .number()
+        .min(0)
+        .max(2)
+        .optional()
+        .describe("Sampling temperature (0=deterministic, default ~0.7)"),
+      max_tokens: z
+        .number()
+        .int()
+        .positive()
+        .optional()
+        .describe("Maximum tokens to generate"),
+      format: z
+        .enum(["json"])
+        .optional()
+        .describe("Set to 'json' to force JSON output"),
+    },
+  },
+
+  ollama_chat: {
+    description:
+      "Multi-turn chat with a local Ollama model. Send a conversation history for context-aware responses.",
+    inputSchema: {
+      model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
+      messages: z
+        .array(
+          z.object({
+            role: z.enum(["system", "user", "assistant"]),
+            content: z.string(),
+          }),
+        )
+        .describe("Conversation messages array"),
+      temperature: z
+        .number()
+        .min(0)
+        .max(2)
+        .optional()
+        .describe("Sampling temperature"),
+      max_tokens: z
+        .number()
+        .int()
+        .positive()
+        .optional()
+        .describe("Maximum tokens to generate"),
+      format: z
+        .enum(["json"])
+        .optional()
+        .describe("Set to 'json' to force JSON output"),
+    },
+  },
+
+  // ── Model Management ──
+  ollama_list_models: {
+    description: "List all locally available Ollama models with size and quantization details.",
+    inputSchema: {},
+  },
+
+  ollama_show_model: {
+    description: "Get detailed information about a specific model (parameters, template, license).",
+    inputSchema: {
+      name: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
+    },
+  },
+
+  ollama_pull_model: {
+    description: "Download a model from the Ollama registry. Blocks until complete.",
+    inputSchema: {
+      name: z.string().describe("Model name to pull (e.g. qwen2.5-coder:7b)"),
+    },
+  },
+
+  ollama_delete_model: {
+    description: "Delete a locally downloaded model to free disk space.",
+    inputSchema: {
+      name: z.string().describe("Model name to delete"),
+    },
+  },
+
+  // ── Status ──
+  ollama_health: {
+    description: "Check if the Ollama server is running and reachable.",
+    inputSchema: {},
+  },
+
+  ollama_list_running: {
+    description: "List currently loaded/running models in memory.",
+    inputSchema: {},
+  },
+
+  // ── Embeddings ──
+  ollama_embed: {
+    description:
+      "Generate embeddings for text using a local model. Useful for semantic search and similarity.",
+    inputSchema: {
+      model: z.string().describe("Model name for embeddings"),
+      input: z
+        .union([z.string(), z.array(z.string())])
+        .describe("Text or array of texts to embed"),
+    },
+  },
+} as const;
@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "Node16",
+    "moduleResolution": "Node16",
+    "outDir": "dist",
+    "rootDir": "src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "declaration": true
+  },
+  "include": ["src/**/*"]
+}