read_file: pass binary documents as EmbeddedResource

PDF, docx, xlsx, pptx etc. are returned as EmbeddedResource with BlobResourceContents (base64 + mimeType). claude.ai processes these client-side, same as Google Drive/Dropbox MCP connectors. Fallback chain: text → image → embedded resource (binary). MIME type guessed from extension when server returns octet-stream. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-12 08:02:32 +02:00
parent a9359beead
commit b88adc4c50
1 changed files with 36 additions and 8 deletions
@@ -7,7 +7,7 @@ from typing import Annotated
 import httpx
 from pydantic import Field
 from mcp.server.fastmcp import FastMCP
-from mcp.types import TextContent, ImageContent
+from mcp.types import TextContent, ImageContent, EmbeddedResource, BlobResourceContents
 from starlette.applications import Starlette
 from starlette.routing import Mount
@@ -71,25 +71,53 @@ def list_files(
 IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp", "image/svg+xml"}
 TEXT_HINTS = ["text/", "json", "xml", "csv", "yaml", "javascript", "markdown"]
 DOC_TYPES = {
    "application/pdf", "application/msword",
    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    "application/vnd.ms-excel", "application/vnd.ms-powerpoint",
    "application/rtf", "application/epub+zip",
 }
 MAX_BIN_SIZE = 25_000_000
 def _guess_mime(path, ct):
    if ct and ct != "application/octet-stream":
        return ct
    ext = path.rsplit(".", 1)[-1].lower() if "." in path else ""
    return {
        "pdf": "application/pdf", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
        "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
        "doc": "application/msword", "xls": "application/vnd.ms-excel", "ppt": "application/vnd.ms-powerpoint",
        "rtf": "application/rtf", "jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
        "gif": "image/gif", "webp": "image/webp", "svg": "image/svg+xml",
    }.get(ext, ct or "application/octet-stream")
@mcp.tool()
 def read_file(
-    path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/Photos/pic.jpg'")],
+    path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/report.pdf', '/photo.jpg'")],
-) -> list[TextContent | ImageContent]:
+) -> list[TextContent | ImageContent | EmbeddedResource]:
-    """Read a file. Text files return content directly. Images (jpg/png/gif/webp) are displayed inline. Other binary files return only metadata."""
+    """Read a file. Text files return content directly. Images are displayed inline. Documents (PDF, docx, xlsx, pptx) are passed as binary for the client to process. Max 25 MB."""
    user = get_current_user()
    if not user: return [TextContent(type="text", text="Error: not authenticated")]
    r = httpx.get(_dav(user, path), auth=_auth(user), timeout=60)
    if r.status_code >= 400: return [TextContent(type="text", text=f"Fehler: HTTP {r.status_code}")]
-    ct = r.headers.get("content-type", "").split(";")[0].strip()
+    ct = _guess_mime(path, r.headers.get("content-type", "").split(";")[0].strip())
-    if ct in IMAGE_TYPES and len(r.content) < 10_000_000:
+    size = len(r.content)
    if size > MAX_BIN_SIZE:
        return [TextContent(type="text", text=f"Datei zu gross: {size:,} bytes (max {MAX_BIN_SIZE:,}). Typ: {ct}")]
    if ct in IMAGE_TYPES:
        return [ImageContent(type="image", data=base64.b64encode(r.content).decode(), mimeType=ct)]
    if any(t in ct for t in TEXT_HINTS):
        return [TextContent(type="text", text=r.text[:100000])]
    try:
-        return [TextContent(type="text", text=r.content.decode("utf-8")[:100000])]
+        text = r.content.decode("utf-8")
        return [TextContent(type="text", text=text[:100000])]
    except Exception:
-        return [TextContent(type="text", text=f"Binaerdatei ({len(r.content):,} bytes, Typ: {ct}). Nutze read_file nur fuer Text und Bilder.")]
+        pass
    b64 = base64.b64encode(r.content).decode()
    return [EmbeddedResource(type="resource", resource=BlobResourceContents(uri=f"file://{path}", blob=b64, mimeType=ct))]
@mcp.tool()