AutoForgeAI · leonvanzyl · Mar 25, 2026 · Mar 25, 2026
diff --git a/requirements-prod.txt b/requirements-prod.txt
@@ -12,3 +12,7 @@ aiofiles>=24.0.0
 apscheduler>=3.10.0,<4.0.0
 pywinpty>=2.0.0; sys_platform == "win32"
 pyyaml>=6.0.0
+python-docx>=1.1.0
+openpyxl>=3.1.0
+PyPDF2>=3.0.0
+python-pptx>=1.0.0
diff --git a/requirements.txt b/requirements.txt
@@ -10,6 +10,10 @@ aiofiles>=24.0.0
 apscheduler>=3.10.0,<4.0.0
 pywinpty>=2.0.0; sys_platform == "win32"
 pyyaml>=6.0.0
+python-docx>=1.1.0
+openpyxl>=3.1.0
+PyPDF2>=3.0.0
+python-pptx>=1.0.0
 
 # Dev dependencies
 ruff>=0.8.0

diff --git a/server/routers/expand_project.py b/server/routers/expand_project.py
@@ -13,7 +13,7 @@
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel, ValidationError
 
-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
 from ..services.expand_chat_session import (
     ExpandChatSession,
     create_expand_session,
@@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
                     user_content = message.get("content", "").strip()
 
                     # Parse attachments if present
-                    attachments: list[ImageAttachment] = []
+                    attachments: list[FileAttachment] = []
                     raw_attachments = message.get("attachments", [])
                     if raw_attachments:
                         try:
                             for raw_att in raw_attachments:
-                                attachments.append(ImageAttachment(**raw_att))
+                                attachments.append(FileAttachment(**raw_att))
                         except (ValidationError, Exception) as e:
                             logger.warning(f"Invalid attachment data: {e}")
                             await websocket.send_json({

diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py
@@ -12,7 +12,7 @@
 from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
 from pydantic import BaseModel, ValidationError
 
-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
 from ..services.spec_chat_session import (
     SpecChatSession,
     create_session,
@@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
                     user_content = message.get("content", "").strip()
 
                     # Parse attachments if present
-                    attachments: list[ImageAttachment] = []
+                    attachments: list[FileAttachment] = []
                     raw_attachments = message.get("attachments", [])
                     if raw_attachments:
                         try:
                             for raw_att in raw_attachments:
-                                attachments.append(ImageAttachment(**raw_att))
+                                attachments.append(FileAttachment(**raw_att))
                         except (ValidationError, Exception) as e:
                             logger.warning(f"Invalid attachment data: {e}")
                             await websocket.send_json({

diff --git a/server/schemas.py b/server/schemas.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 from typing import Literal
 
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field, field_validator, model_validator
 
 # Import model constants from registry (single source of truth)
 _root = Path(__file__).parent.parent
@@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):
 
 
 # ============================================================================
-# Spec Chat Schemas
+# Chat Attachment Schemas
 # ============================================================================
 
-# Maximum image file size: 5 MB
-MAX_IMAGE_SIZE = 5 * 1024 * 1024
+# Size limits
+MAX_IMAGE_SIZE = 5 * 1024 * 1024      # 5 MB for images
+MAX_DOCUMENT_SIZE = 20 * 1024 * 1024   # 20 MB for documents
 
+_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}
 
-class ImageAttachment(BaseModel):
-    """Image attachment from client for spec creation chat."""
+
+class FileAttachment(BaseModel):
+    """File attachment from client for spec creation / expand project chat."""
     filename: str = Field(..., min_length=1, max_length=255)
-    mimeType: Literal['image/jpeg', 'image/png']
+    mimeType: Literal[
+        'image/jpeg', 'image/png',
+        'text/plain', 'text/markdown', 'text/csv',
+        'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+        'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+        'application/pdf',
+        'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    ]
     base64Data: str
 
     @field_validator('base64Data')
     @classmethod
-    def validate_base64_and_size(cls, v: str) -> str:
-        """Validate that base64 data is valid and within size limit."""
+    def validate_base64(cls, v: str) -> str:
+        """Validate that base64 data is decodable."""
         try:
-            decoded = base64.b64decode(v)
-            if len(decoded) > MAX_IMAGE_SIZE:
-                raise ValueError(
-                    f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
-                    f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
-                )
+            base64.b64decode(v)
             return v
         except Exception as e:
-            if 'Image size' in str(e):
-                raise
             raise ValueError(f'Invalid base64 data: {e}')
 
+    @model_validator(mode='after')
+    def validate_size(self) -> 'FileAttachment':
+        """Validate file size based on MIME type."""
+        try:
+            decoded = base64.b64decode(self.base64Data)
+        except Exception:
+            return self  # Already caught by field validator
+
+        if self.mimeType in _IMAGE_MIME_TYPES:
+            max_size = MAX_IMAGE_SIZE
+            label = "Image"
+        else:
+            max_size = MAX_DOCUMENT_SIZE
+            label = "Document"
+
+        if len(decoded) > max_size:
+            raise ValueError(
+                f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
+                f'maximum of {max_size // (1024 * 1024)} MB'
+            )
+        return self
+
 
 # ============================================================================
 # Filesystem Schemas

diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py
@@ -35,6 +35,13 @@
 from env_constants import API_ENV_VARS  # noqa: E402, F401
 from rate_limit_utils import is_rate_limit_error, parse_retry_after  # noqa: E402, F401
 
+from ..schemas import FileAttachment
+from ..utils.document_extraction import (
+    extract_text_from_document,
+    is_document,
+    is_image,
+)
+
 logger = logging.getLogger(__name__)
 
 
@@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
             raise
 
 
+def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
+    """Convert FileAttachment objects to Claude API content blocks.
+
+    Images become image content blocks (passed directly to Claude's vision).
+    Documents are extracted to text and become text content blocks.
+
+    Raises:
+        DocumentExtractionError: If a document cannot be read.
+    """
+    blocks: list[dict] = []
+    for att in attachments:
+        if is_image(att.mimeType):
+            blocks.append({
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": att.mimeType,
+                    "data": att.base64Data,
+                }
+            })
+        elif is_document(att.mimeType):
+            text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
+            blocks.append({
+                "type": "text",
+                "text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
+            })
+    return blocks
+
+
 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
     """Yield a single multimodal user message in Claude Agent SDK format.
 

diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py
@@ -21,9 +21,11 @@
 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv
 
-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
+from ..utils.document_extraction import DocumentExtractionError
 from .chat_constants import (
     ROOT_DIR,
+    build_attachment_content_blocks,
     check_rate_limit_error,
     make_multimodal_message,
     safe_receive_response,
@@ -226,7 +228,7 @@ async def start(self) -> AsyncGenerator[dict, None]:
     async def send_message(
         self,
         user_message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
     ) -> AsyncGenerator[dict, None]:
         """
         Send user message and stream Claude's response.
@@ -273,7 +275,7 @@ async def send_message(
     async def _query_claude(
         self,
         message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
     ) -> AsyncGenerator[dict, None]:
         """
         Internal method to query Claude and stream responses.
@@ -289,17 +291,16 @@ async def _query_claude(
             content_blocks: list[dict[str, Any]] = []
             if message:
                 content_blocks.append({"type": "text", "text": message})
-            for att in attachments:
-                content_blocks.append({
-                    "type": "image",
-                    "source": {
-                        "type": "base64",
-                        "media_type": att.mimeType,
-                        "data": att.base64Data,
-                    }
-                })
+
+            # Add attachment blocks (images as image blocks, documents as extracted text)
+            try:
+                content_blocks.extend(build_attachment_content_blocks(attachments))
+            except DocumentExtractionError as e:
+                yield {"type": "error", "content": str(e)}
+                return
+
             await self.client.query(make_multimodal_message(content_blocks))
-            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+            logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
         else:
             await self.client.query(message)
 

diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py
@@ -18,9 +18,11 @@
 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv
 
-from ..schemas import ImageAttachment
+from ..schemas import FileAttachment
+from ..utils.document_extraction import DocumentExtractionError
 from .chat_constants import (
     ROOT_DIR,
+    build_attachment_content_blocks,
     check_rate_limit_error,
     make_multimodal_message,
     safe_receive_response,
@@ -201,7 +203,7 @@ async def start(self) -> AsyncGenerator[dict, None]:
     async def send_message(
         self,
         user_message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
     ) -> AsyncGenerator[dict, None]:
         """
         Send user message and stream Claude's response.
@@ -247,7 +249,7 @@ async def send_message(
     async def _query_claude(
         self,
         message: str,
-        attachments: list[ImageAttachment] | None = None
+        attachments: list[FileAttachment] | None = None
     ) -> AsyncGenerator[dict, None]:
         """
         Internal method to query Claude and stream responses.
@@ -273,21 +275,17 @@ async def _query_claude(
             if message:
                 content_blocks.append({"type": "text", "text": message})
 
-            # Add image blocks
-            for att in attachments:
-                content_blocks.append({
-                    "type": "image",
-                    "source": {
-                        "type": "base64",
-                        "media_type": att.mimeType,
-                        "data": att.base64Data,
-                    }
-                })
+            # Add attachment blocks (images as image blocks, documents as extracted text)
+            try:
+                content_blocks.extend(build_attachment_content_blocks(attachments))
+            except DocumentExtractionError as e:
+                yield {"type": "error", "content": str(e)}
+                return
 
             # Send multimodal content to Claude using async generator format
             # The SDK's query() accepts AsyncIterable[dict] for custom message formats
             await self.client.query(make_multimodal_message(content_blocks))
-            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
+            logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
         else:
             # Text-only message: use string format
             await self.client.query(message)