read_file: pass binary documents as EmbeddedResource
PDF, docx, xlsx, pptx etc. are returned as EmbeddedResource with BlobResourceContents (base64 + mimeType). claude.ai processes these client-side, same as Google Drive/Dropbox MCP connectors. Fallback chain: text → image → embedded resource (binary). MIME type guessed from extension when server returns octet-stream. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+36
-8
@@ -7,7 +7,7 @@ from typing import Annotated
|
|||||||
import httpx
|
import httpx
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from mcp.server.fastmcp import FastMCP
|
from mcp.server.fastmcp import FastMCP
|
||||||
from mcp.types import TextContent, ImageContent
|
from mcp.types import TextContent, ImageContent, EmbeddedResource, BlobResourceContents
|
||||||
from starlette.applications import Starlette
|
from starlette.applications import Starlette
|
||||||
from starlette.routing import Mount
|
from starlette.routing import Mount
|
||||||
|
|
||||||
@@ -71,25 +71,53 @@ def list_files(
|
|||||||
|
|
||||||
IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp", "image/svg+xml"}
|
IMAGE_TYPES = {"image/jpeg", "image/png", "image/gif", "image/webp", "image/svg+xml"}
|
||||||
TEXT_HINTS = ["text/", "json", "xml", "csv", "yaml", "javascript", "markdown"]
|
TEXT_HINTS = ["text/", "json", "xml", "csv", "yaml", "javascript", "markdown"]
|
||||||
|
DOC_TYPES = {
|
||||||
|
"application/pdf", "application/msword",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
|
"application/vnd.ms-excel", "application/vnd.ms-powerpoint",
|
||||||
|
"application/rtf", "application/epub+zip",
|
||||||
|
}
|
||||||
|
MAX_BIN_SIZE = 25_000_000
|
||||||
|
|
||||||
|
def _guess_mime(path, ct):
|
||||||
|
if ct and ct != "application/octet-stream":
|
||||||
|
return ct
|
||||||
|
ext = path.rsplit(".", 1)[-1].lower() if "." in path else ""
|
||||||
|
return {
|
||||||
|
"pdf": "application/pdf", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
|
"doc": "application/msword", "xls": "application/vnd.ms-excel", "ppt": "application/vnd.ms-powerpoint",
|
||||||
|
"rtf": "application/rtf", "jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png",
|
||||||
|
"gif": "image/gif", "webp": "image/webp", "svg": "image/svg+xml",
|
||||||
|
}.get(ext, ct or "application/octet-stream")
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
def read_file(
|
def read_file(
|
||||||
path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/Photos/pic.jpg'")],
|
path: Annotated[str, Field(description="Full file path, e.g. '/Documents/notes.txt', '/report.pdf', '/photo.jpg'")],
|
||||||
) -> list[TextContent | ImageContent]:
|
) -> list[TextContent | ImageContent | EmbeddedResource]:
|
||||||
"""Read a file. Text files return content directly. Images (jpg/png/gif/webp) are displayed inline. Other binary files return only metadata."""
|
"""Read a file. Text files return content directly. Images are displayed inline. Documents (PDF, docx, xlsx, pptx) are passed as binary for the client to process. Max 25 MB."""
|
||||||
user = get_current_user()
|
user = get_current_user()
|
||||||
if not user: return [TextContent(type="text", text="Error: not authenticated")]
|
if not user: return [TextContent(type="text", text="Error: not authenticated")]
|
||||||
r = httpx.get(_dav(user, path), auth=_auth(user), timeout=60)
|
r = httpx.get(_dav(user, path), auth=_auth(user), timeout=60)
|
||||||
if r.status_code >= 400: return [TextContent(type="text", text=f"Fehler: HTTP {r.status_code}")]
|
if r.status_code >= 400: return [TextContent(type="text", text=f"Fehler: HTTP {r.status_code}")]
|
||||||
ct = r.headers.get("content-type", "").split(";")[0].strip()
|
ct = _guess_mime(path, r.headers.get("content-type", "").split(";")[0].strip())
|
||||||
if ct in IMAGE_TYPES and len(r.content) < 10_000_000:
|
size = len(r.content)
|
||||||
|
if size > MAX_BIN_SIZE:
|
||||||
|
return [TextContent(type="text", text=f"Datei zu gross: {size:,} bytes (max {MAX_BIN_SIZE:,}). Typ: {ct}")]
|
||||||
|
if ct in IMAGE_TYPES:
|
||||||
return [ImageContent(type="image", data=base64.b64encode(r.content).decode(), mimeType=ct)]
|
return [ImageContent(type="image", data=base64.b64encode(r.content).decode(), mimeType=ct)]
|
||||||
if any(t in ct for t in TEXT_HINTS):
|
if any(t in ct for t in TEXT_HINTS):
|
||||||
return [TextContent(type="text", text=r.text[:100000])]
|
return [TextContent(type="text", text=r.text[:100000])]
|
||||||
try:
|
try:
|
||||||
return [TextContent(type="text", text=r.content.decode("utf-8")[:100000])]
|
text = r.content.decode("utf-8")
|
||||||
|
return [TextContent(type="text", text=text[:100000])]
|
||||||
except Exception:
|
except Exception:
|
||||||
return [TextContent(type="text", text=f"Binaerdatei ({len(r.content):,} bytes, Typ: {ct}). Nutze read_file nur fuer Text und Bilder.")]
|
pass
|
||||||
|
b64 = base64.b64encode(r.content).decode()
|
||||||
|
return [EmbeddedResource(type="resource", resource=BlobResourceContents(uri=f"file://{path}", blob=b64, mimeType=ct))]
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
|
|||||||
Reference in New Issue
Block a user