read_file: pass binary documents as EmbeddedResource

PDF, docx, xlsx, pptx etc. are returned as EmbeddedResource with
BlobResourceContents (base64 + mimeType). claude.ai processes these
client-side, same as Google Drive/Dropbox MCP connectors.

Fallback chain: text → image → embedded resource (binary).
MIME type guessed from extension when server returns octet-stream.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Stefan Lohmaier
2026-06-12 08:02:32 +02:00
parent a9359beead
commit b88adc4c50
+36 -8
View File
@@ -7,7 +7,7 @@ from typing import Annotated
import httpx
from pydantic import Field
from mcp.server.fastmcp import FastMCP
from mcp.types import TextContent, ImageContent
from mcp.types import TextContent, ImageContent, EmbeddedResource, BlobResourceContents
from starlette.applications import Starlette
from starlette.routing import Mount
@@ -71,25 +71,53 @@ def list_files(
IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp", "image/svg+xml"}
TEXT_HINTS = ["text/", "json", "xml", "csv", "yaml", "javascript", "markdown"]
DOC_TYPES = {
"application/pdf", "application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
"application/vnd.ms-excel", "application/vnd.ms-powerpoint",
"application/rtf", "application/epub+zip",
}
MAX_BIN_SIZE = 25_000_000
def _guess_mime(path, ct):
if ct and ct != "application/octet-stream":
return ct
ext = path.rsplit(".", 1)[-1].lower() if "." in path else ""
return {
"pdf": "application/pdf", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"doc": "application/msword", "xls": "application/vnd.ms-excel", "ppt": "application/vnd.ms-powerpoint",
"rtf": "application/rtf", "jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
"gif": "image/gif", "webp": "image/webp", "svg": "image/svg+xml",
}.get(ext, ct or "application/octet-stream")
@mcp.tool()
def read_file(
path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/Photos/pic.jpg'")],
) -> list[TextContent | ImageContent]:
"""Read a file. Text files return content directly. Images (jpg/png/gif/webp) are displayed inline. Other binary files return only metadata."""
path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/report.pdf', '/photo.jpg'")],
) -> list[TextContent | ImageContent | EmbeddedResource]:
"""Read a file. Text files return content directly. Images are displayed inline. Documents (PDF, docx, xlsx, pptx) are passed as binary for the client to process. Max 25 MB."""
user = get_current_user()
if not user: return [TextContent(type="text", text="Error: not authenticated")]
r = httpx.get(_dav(user, path), auth=_auth(user), timeout=60)
if r.status_code >= 400: return [TextContent(type="text", text=f"Fehler: HTTP {r.status_code}")]
ct = r.headers.get("content-type", "").split(";")[0].strip()
if ct in IMAGE_TYPES and len(r.content) < 10_000_000:
ct = _guess_mime(path, r.headers.get("content-type", "").split(";")[0].strip())
size = len(r.content)
if size > MAX_BIN_SIZE:
return [TextContent(type="text", text=f"Datei zu gross: {size:,} bytes (max {MAX_BIN_SIZE:,}). Typ: {ct}")]
if ct in IMAGE_TYPES:
return [ImageContent(type="image", data=base64.b64encode(r.content).decode(), mimeType=ct)]
if any(t in ct for t in TEXT_HINTS):
return [TextContent(type="text", text=r.text[:100000])]
try:
return [TextContent(type="text", text=r.content.decode("utf-8")[:100000])]
text = r.content.decode("utf-8")
return [TextContent(type="text", text=text[:100000])]
except Exception:
return [TextContent(type="text", text=f"Binaerdatei ({len(r.content):,} bytes, Typ: {ct}). Nutze read_file nur fuer Text und Bilder.")]
pass
b64 = base64.b64encode(r.content).decode()
return [EmbeddedResource(type="resource", resource=BlobResourceContents(uri=f"file://{path}", blob=b64, mimeType=ct))]
@mcp.tool()