feat: initial Ollama MCP server
TypeScript MCP server wrapping the Ollama REST API. Provides tools for: - Text generation and multi-turn chat - Model management (list, show, pull, delete) - Health check and running model status - Embeddings generation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
node_modules/
|
||||
dist/
|
||||
.env
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "@kollect/ollama-mcp",
|
||||
"version": "0.1.0",
|
||||
"description": "MCP server for Ollama local LLM integration",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"start": "node dist/index.js",
|
||||
"dev": "tsx src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.12.1",
|
||||
"zod": "^4.3.6"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^22.0.0",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.7.0"
|
||||
}
|
||||
}
|
||||
Generated
+1106
File diff suppressed because it is too large
Load Diff
+174
@@ -0,0 +1,174 @@
|
||||
/**
|
||||
* Ollama HTTP API client.
|
||||
*
|
||||
* Wraps the Ollama REST API (default http://127.0.0.1:11434).
|
||||
* Docs: https://github.com/ollama/ollama/blob/main/docs/api.md
|
||||
*/
|
||||
|
||||
export interface OllamaConfig {
|
||||
host: string;
|
||||
}
|
||||
|
||||
export interface GenerateRequest {
|
||||
model: string;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
format?: "json";
|
||||
}
|
||||
|
||||
export interface ChatMessage {
|
||||
role: "system" | "user" | "assistant";
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface ChatRequest {
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
format?: "json";
|
||||
}
|
||||
|
||||
export interface ModelInfo {
|
||||
name: string;
|
||||
model: string;
|
||||
size: number;
|
||||
details: {
|
||||
parameter_size: string;
|
||||
quantization_level: string;
|
||||
family: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface PullProgress {
|
||||
status: string;
|
||||
digest?: string;
|
||||
total?: number;
|
||||
completed?: number;
|
||||
}
|
||||
|
||||
export class OllamaClient {
|
||||
private host: string;
|
||||
|
||||
constructor(config: OllamaConfig) {
|
||||
this.host = config.host.replace(/\/+$/, "");
|
||||
}
|
||||
|
||||
private async request(path: string, options?: RequestInit): Promise<Response> {
|
||||
const url = `${this.host}${path}`;
|
||||
const res = await fetch(url, options);
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => "");
|
||||
throw new Error(`Ollama API ${res.status}: ${body || res.statusText}`);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/** Generate a completion (non-streaming). */
|
||||
async generate(req: GenerateRequest): Promise<string> {
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model,
|
||||
prompt: req.prompt,
|
||||
stream: false,
|
||||
};
|
||||
if (req.system) body.system = req.system;
|
||||
if (req.temperature !== undefined) body.temperature = req.temperature;
|
||||
if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
|
||||
if (req.format) body.format = req.format;
|
||||
|
||||
const res = await this.request("/api/generate", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const data = await res.json() as { response: string };
|
||||
return data.response;
|
||||
}
|
||||
|
||||
/** Multi-turn chat completion (non-streaming). */
|
||||
async chat(req: ChatRequest): Promise<string> {
|
||||
const body: Record<string, unknown> = {
|
||||
model: req.model,
|
||||
messages: req.messages,
|
||||
stream: false,
|
||||
};
|
||||
if (req.temperature !== undefined) body.temperature = req.temperature;
|
||||
if (req.max_tokens !== undefined) body.options = { num_predict: req.max_tokens };
|
||||
if (req.format) body.format = req.format;
|
||||
|
||||
const res = await this.request("/api/chat", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const data = await res.json() as { message: { content: string } };
|
||||
return data.message.content;
|
||||
}
|
||||
|
||||
/** List locally available models. */
|
||||
async listModels(): Promise<ModelInfo[]> {
|
||||
const res = await this.request("/api/tags");
|
||||
const data = await res.json() as { models: ModelInfo[] };
|
||||
return data.models;
|
||||
}
|
||||
|
||||
/** Get detailed info about a model. */
|
||||
async showModel(name: string): Promise<Record<string, unknown>> {
|
||||
const res = await this.request("/api/show", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name }),
|
||||
});
|
||||
return await res.json() as Record<string, unknown>;
|
||||
}
|
||||
|
||||
/** Pull a model (blocking — waits for completion). */
|
||||
async pullModel(name: string): Promise<string> {
|
||||
const res = await this.request("/api/pull", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name, stream: false }),
|
||||
});
|
||||
const data = await res.json() as { status: string };
|
||||
return data.status;
|
||||
}
|
||||
|
||||
/** Delete a model. */
|
||||
async deleteModel(name: string): Promise<void> {
|
||||
await this.request("/api/delete", {
|
||||
method: "DELETE",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name }),
|
||||
});
|
||||
}
|
||||
|
||||
/** Check if Ollama is reachable. */
|
||||
async health(): Promise<boolean> {
|
||||
try {
|
||||
await this.request("/");
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** List running models. */
|
||||
async listRunning(): Promise<unknown[]> {
|
||||
const res = await this.request("/api/ps");
|
||||
const data = await res.json() as { models: unknown[] };
|
||||
return data.models ?? [];
|
||||
}
|
||||
|
||||
/** Generate embeddings. */
|
||||
async embed(model: string, input: string | string[]): Promise<number[][]> {
|
||||
const res = await this.request("/api/embed", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ model, input }),
|
||||
});
|
||||
const data = await res.json() as { embeddings: number[][] };
|
||||
return data.embeddings;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Tool handler implementations for Ollama MCP server.
|
||||
*/
|
||||
|
||||
import type { OllamaClient, ChatMessage } from "./client.js";
|
||||
|
||||
function formatBytes(bytes: number): string {
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(0)} KB`;
|
||||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
|
||||
}
|
||||
|
||||
export function createHandlers(client: OllamaClient) {
|
||||
return {
|
||||
ollama_generate: async (args: {
|
||||
model: string;
|
||||
prompt: string;
|
||||
system?: string;
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
format?: "json";
|
||||
}) => {
|
||||
return await client.generate(args);
|
||||
},
|
||||
|
||||
ollama_chat: async (args: {
|
||||
model: string;
|
||||
messages: ChatMessage[];
|
||||
temperature?: number;
|
||||
max_tokens?: number;
|
||||
format?: "json";
|
||||
}) => {
|
||||
return await client.chat(args);
|
||||
},
|
||||
|
||||
ollama_list_models: async () => {
|
||||
const models = await client.listModels();
|
||||
if (models.length === 0) return "No models installed. Use ollama_pull_model to download one.";
|
||||
|
||||
return models
|
||||
.map(
|
||||
(m) =>
|
||||
`- ${m.name} (${formatBytes(m.size)}, ${m.details.parameter_size}, ${m.details.quantization_level}, family: ${m.details.family})`,
|
||||
)
|
||||
.join("\n");
|
||||
},
|
||||
|
||||
ollama_show_model: async (args: { name: string }) => {
|
||||
const info = await client.showModel(args.name);
|
||||
// Return a readable subset — full response can be huge (includes template, license)
|
||||
const details = info.details as Record<string, unknown> | undefined;
|
||||
const params = info.model_info as Record<string, unknown> | undefined;
|
||||
const lines = [`Model: ${args.name}`];
|
||||
if (details) {
|
||||
lines.push(`Family: ${details.family ?? "unknown"}`);
|
||||
lines.push(`Parameters: ${details.parameter_size ?? "unknown"}`);
|
||||
lines.push(`Quantization: ${details.quantization_level ?? "unknown"}`);
|
||||
lines.push(`Format: ${details.format ?? "unknown"}`);
|
||||
}
|
||||
if (info.template) lines.push(`Template: (${(info.template as string).length} chars)`);
|
||||
if (info.license) lines.push(`License: (${(info.license as string).length} chars)`);
|
||||
if (params) {
|
||||
const paramKeys = Object.keys(params).slice(0, 10);
|
||||
lines.push(`Model info keys: ${paramKeys.join(", ")}${Object.keys(params).length > 10 ? "..." : ""}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
},
|
||||
|
||||
ollama_pull_model: async (args: { name: string }) => {
|
||||
const status = await client.pullModel(args.name);
|
||||
return `Pull complete: ${args.name} — ${status}`;
|
||||
},
|
||||
|
||||
ollama_delete_model: async (args: { name: string }) => {
|
||||
await client.deleteModel(args.name);
|
||||
return `Deleted: ${args.name}`;
|
||||
},
|
||||
|
||||
ollama_health: async () => {
|
||||
const ok = await client.health();
|
||||
return ok ? "Ollama is running and reachable." : "Ollama is not reachable.";
|
||||
},
|
||||
|
||||
ollama_list_running: async () => {
|
||||
const models = await client.listRunning();
|
||||
if (models.length === 0) return "No models currently loaded in memory.";
|
||||
return JSON.stringify(models, null, 2);
|
||||
},
|
||||
|
||||
ollama_embed: async (args: { model: string; input: string | string[] }) => {
|
||||
const embeddings = await client.embed(args.model, args.input);
|
||||
const count = embeddings.length;
|
||||
const dim = embeddings[0]?.length ?? 0;
|
||||
return `Generated ${count} embedding(s), dimension: ${dim}\n\n${JSON.stringify(embeddings)}`;
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Ollama MCP Server
|
||||
*
|
||||
* Connects AI assistants to a local Ollama instance for LLM inference,
|
||||
* model management, and embeddings.
|
||||
*
|
||||
* Environment variables:
|
||||
* OLLAMA_HOST — Base URL of the Ollama server (default: http://127.0.0.1:11434)
|
||||
*/
|
||||
|
||||
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
||||
import { OllamaClient } from "./client.js";
|
||||
import { toolDefs } from "./tools.js";
|
||||
import { createHandlers } from "./handlers.js";
|
||||
|
||||
const host = process.env.OLLAMA_HOST ?? "http://127.0.0.1:11434";
|
||||
|
||||
const client = new OllamaClient({ host });
|
||||
const handlers = createHandlers(client);
|
||||
|
||||
const server = new McpServer({
|
||||
name: "ollama",
|
||||
version: "0.1.0",
|
||||
});
|
||||
|
||||
// Register each tool from toolDefs with its corresponding handler.
|
||||
for (const [name, def] of Object.entries(toolDefs)) {
|
||||
const handler = handlers[name as keyof typeof handlers];
|
||||
if (!handler) {
|
||||
console.error(`No handler for tool: ${name}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
server.tool(
|
||||
name,
|
||||
def.description,
|
||||
def.inputSchema,
|
||||
async (args: any) => {
|
||||
try {
|
||||
const result = await (handler as Function)(args);
|
||||
return { content: [{ type: "text" as const, text: String(result) }] };
|
||||
} catch (err: any) {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: `Error: ${err.message}` }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
console.error(`Ollama MCP server running on stdio (host: ${host})`);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
+121
@@ -0,0 +1,121 @@
|
||||
/**
|
||||
* MCP tool definitions for Ollama.
|
||||
*
|
||||
* Each tool has a description and a Zod input schema.
|
||||
*/
|
||||
|
||||
import { z } from "zod";
|
||||
|
||||
export const toolDefs = {
|
||||
// ── Generation ──
|
||||
ollama_generate: {
|
||||
description:
|
||||
"Generate a text completion using a local Ollama model. Use for code generation, review, explanation, or any text task.",
|
||||
inputSchema: {
|
||||
model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
|
||||
prompt: z.string().describe("The prompt to send to the model"),
|
||||
system: z
|
||||
.string()
|
||||
.optional()
|
||||
.describe("System prompt to set context/persona"),
|
||||
temperature: z
|
||||
.number()
|
||||
.min(0)
|
||||
.max(2)
|
||||
.optional()
|
||||
.describe("Sampling temperature (0=deterministic, default ~0.7)"),
|
||||
max_tokens: z
|
||||
.number()
|
||||
.int()
|
||||
.positive()
|
||||
.optional()
|
||||
.describe("Maximum tokens to generate"),
|
||||
format: z
|
||||
.enum(["json"])
|
||||
.optional()
|
||||
.describe("Set to 'json' to force JSON output"),
|
||||
},
|
||||
},
|
||||
|
||||
ollama_chat: {
|
||||
description:
|
||||
"Multi-turn chat with a local Ollama model. Send a conversation history for context-aware responses.",
|
||||
inputSchema: {
|
||||
model: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
|
||||
messages: z
|
||||
.array(
|
||||
z.object({
|
||||
role: z.enum(["system", "user", "assistant"]),
|
||||
content: z.string(),
|
||||
}),
|
||||
)
|
||||
.describe("Conversation messages array"),
|
||||
temperature: z
|
||||
.number()
|
||||
.min(0)
|
||||
.max(2)
|
||||
.optional()
|
||||
.describe("Sampling temperature"),
|
||||
max_tokens: z
|
||||
.number()
|
||||
.int()
|
||||
.positive()
|
||||
.optional()
|
||||
.describe("Maximum tokens to generate"),
|
||||
format: z
|
||||
.enum(["json"])
|
||||
.optional()
|
||||
.describe("Set to 'json' to force JSON output"),
|
||||
},
|
||||
},
|
||||
|
||||
// ── Model Management ──
|
||||
ollama_list_models: {
|
||||
description: "List all locally available Ollama models with size and quantization details.",
|
||||
inputSchema: {},
|
||||
},
|
||||
|
||||
ollama_show_model: {
|
||||
description: "Get detailed information about a specific model (parameters, template, license).",
|
||||
inputSchema: {
|
||||
name: z.string().describe("Model name (e.g. qwen2.5-coder:7b)"),
|
||||
},
|
||||
},
|
||||
|
||||
ollama_pull_model: {
|
||||
description: "Download a model from the Ollama registry. Blocks until complete.",
|
||||
inputSchema: {
|
||||
name: z.string().describe("Model name to pull (e.g. qwen2.5-coder:7b)"),
|
||||
},
|
||||
},
|
||||
|
||||
ollama_delete_model: {
|
||||
description: "Delete a locally downloaded model to free disk space.",
|
||||
inputSchema: {
|
||||
name: z.string().describe("Model name to delete"),
|
||||
},
|
||||
},
|
||||
|
||||
// ── Status ──
|
||||
ollama_health: {
|
||||
description: "Check if the Ollama server is running and reachable.",
|
||||
inputSchema: {},
|
||||
},
|
||||
|
||||
ollama_list_running: {
|
||||
description: "List currently loaded/running models in memory.",
|
||||
inputSchema: {},
|
||||
},
|
||||
|
||||
// ── Embeddings ──
|
||||
ollama_embed: {
|
||||
description:
|
||||
"Generate embeddings for text using a local model. Useful for semantic search and similarity.",
|
||||
inputSchema: {
|
||||
model: z.string().describe("Model name for embeddings"),
|
||||
input: z
|
||||
.union([z.string(), z.array(z.string())])
|
||||
.describe("Text or array of texts to embed"),
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "Node16",
|
||||
"moduleResolution": "Node16",
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"declaration": true
|
||||
},
|
||||
"include": ["src/**/*"]
|
||||
}
|
||||
Reference in New Issue
Block a user