Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions requirements-prod.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ aiofiles>=24.0.0
apscheduler>=3.10.0,<4.0.0
pywinpty>=2.0.0; sys_platform == "win32"
pyyaml>=6.0.0
python-docx>=1.1.0
openpyxl>=3.1.0
PyPDF2>=3.0.0
python-pptx>=1.0.0

# Dev dependencies
ruff>=0.8.0
Expand Down
6 changes: 3 additions & 3 deletions server/routers/expand_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError

from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..services.expand_chat_session import (
ExpandChatSession,
create_expand_session,
Expand Down Expand Up @@ -181,12 +181,12 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip()

# Parse attachments if present
attachments: list[ImageAttachment] = []
attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", [])
if raw_attachments:
try:
for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att))
attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({
Expand Down
6 changes: 3 additions & 3 deletions server/routers/spec_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect
from pydantic import BaseModel, ValidationError

from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..services.spec_chat_session import (
SpecChatSession,
create_session,
Expand Down Expand Up @@ -242,12 +242,12 @@ async def spec_chat_websocket(websocket: WebSocket, project_name: str):
user_content = message.get("content", "").strip()

# Parse attachments if present
attachments: list[ImageAttachment] = []
attachments: list[FileAttachment] = []
raw_attachments = message.get("attachments", [])
if raw_attachments:
try:
for raw_att in raw_attachments:
attachments.append(ImageAttachment(**raw_att))
attachments.append(FileAttachment(**raw_att))
except (ValidationError, Exception) as e:
logger.warning(f"Invalid attachment data: {e}")
await websocket.send_json({
Expand Down
59 changes: 42 additions & 17 deletions server/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pathlib import Path
from typing import Literal

from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator

# Import model constants from registry (single source of truth)
_root = Path(__file__).parent.parent
Expand Down Expand Up @@ -331,36 +331,61 @@ class WSAgentUpdateMessage(BaseModel):


# ============================================================================
# Spec Chat Schemas
# Chat Attachment Schemas
# ============================================================================

# Maximum image file size: 5 MB
MAX_IMAGE_SIZE = 5 * 1024 * 1024
# Size limits
MAX_IMAGE_SIZE = 5 * 1024 * 1024 # 5 MB for images
MAX_DOCUMENT_SIZE = 20 * 1024 * 1024 # 20 MB for documents

_IMAGE_MIME_TYPES = {'image/jpeg', 'image/png'}

class ImageAttachment(BaseModel):
"""Image attachment from client for spec creation chat."""

class FileAttachment(BaseModel):
"""File attachment from client for spec creation / expand project chat."""
filename: str = Field(..., min_length=1, max_length=255)
mimeType: Literal['image/jpeg', 'image/png']
mimeType: Literal[
'image/jpeg', 'image/png',
'text/plain', 'text/markdown', 'text/csv',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/pdf',
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
]
base64Data: str

@field_validator('base64Data')
@classmethod
def validate_base64_and_size(cls, v: str) -> str:
"""Validate that base64 data is valid and within size limit."""
def validate_base64(cls, v: str) -> str:
"""Validate that base64 data is decodable."""
try:
decoded = base64.b64decode(v)
if len(decoded) > MAX_IMAGE_SIZE:
raise ValueError(
f'Image size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {MAX_IMAGE_SIZE // (1024 * 1024)} MB'
)
base64.b64decode(v)
return v
except Exception as e:
if 'Image size' in str(e):
raise
raise ValueError(f'Invalid base64 data: {e}')

@model_validator(mode='after')
def validate_size(self) -> 'FileAttachment':
"""Validate file size based on MIME type."""
try:
decoded = base64.b64decode(self.base64Data)
except Exception:
return self # Already caught by field validator

if self.mimeType in _IMAGE_MIME_TYPES:
max_size = MAX_IMAGE_SIZE
label = "Image"
else:
max_size = MAX_DOCUMENT_SIZE
label = "Document"

if len(decoded) > max_size:
raise ValueError(
f'{label} size ({len(decoded) / (1024 * 1024):.1f} MB) exceeds '
f'maximum of {max_size // (1024 * 1024)} MB'
)
return self


# ============================================================================
# Filesystem Schemas
Expand Down
36 changes: 36 additions & 0 deletions server/services/chat_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@
from env_constants import API_ENV_VARS # noqa: E402, F401
from rate_limit_utils import is_rate_limit_error, parse_retry_after # noqa: E402, F401

from ..schemas import FileAttachment
from ..utils.document_extraction import (
extract_text_from_document,
is_document,
is_image,
)

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -88,6 +95,35 @@ async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenera
raise


def build_attachment_content_blocks(attachments: list[FileAttachment]) -> list[dict]:
"""Convert FileAttachment objects to Claude API content blocks.

Images become image content blocks (passed directly to Claude's vision).
Documents are extracted to text and become text content blocks.

Raises:
DocumentExtractionError: If a document cannot be read.
"""
blocks: list[dict] = []
for att in attachments:
if is_image(att.mimeType):
blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
elif is_document(att.mimeType):
text = extract_text_from_document(att.base64Data, att.mimeType, att.filename)
blocks.append({
"type": "text",
"text": f"[Content of uploaded file: {att.filename}]\n\n{text}",
})
return blocks


async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
"""Yield a single multimodal user message in Claude Agent SDK format.

Expand Down
27 changes: 14 additions & 13 deletions server/services/expand_chat_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv

from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import (
ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error,
make_multimodal_message,
safe_receive_response,
Expand Down Expand Up @@ -226,7 +228,7 @@ async def start(self) -> AsyncGenerator[dict, None]:
async def send_message(
self,
user_message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
Expand Down Expand Up @@ -273,7 +275,7 @@ async def send_message(
async def _query_claude(
self,
message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
Expand All @@ -289,17 +291,16 @@ async def _query_claude(
content_blocks: list[dict[str, Any]] = []
if message:
content_blocks.append({"type": "text", "text": message})
for att in attachments:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})

# Add attachment blocks (images as image blocks, documents as extracted text)
try:
content_blocks.extend(build_attachment_content_blocks(attachments))
except DocumentExtractionError as e:
yield {"type": "error", "content": str(e)}
return

await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else:
await self.client.query(message)

Expand Down
26 changes: 12 additions & 14 deletions server/services/spec_chat_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv

from ..schemas import ImageAttachment
from ..schemas import FileAttachment
from ..utils.document_extraction import DocumentExtractionError
from .chat_constants import (
ROOT_DIR,
build_attachment_content_blocks,
check_rate_limit_error,
make_multimodal_message,
safe_receive_response,
Expand Down Expand Up @@ -201,7 +203,7 @@ async def start(self) -> AsyncGenerator[dict, None]:
async def send_message(
self,
user_message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Send user message and stream Claude's response.
Expand Down Expand Up @@ -247,7 +249,7 @@ async def send_message(
async def _query_claude(
self,
message: str,
attachments: list[ImageAttachment] | None = None
attachments: list[FileAttachment] | None = None
) -> AsyncGenerator[dict, None]:
"""
Internal method to query Claude and stream responses.
Expand All @@ -273,21 +275,17 @@ async def _query_claude(
if message:
content_blocks.append({"type": "text", "text": message})

# Add image blocks
for att in attachments:
content_blocks.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
# Add attachment blocks (images as image blocks, documents as extracted text)
try:
content_blocks.extend(build_attachment_content_blocks(attachments))
except DocumentExtractionError as e:
yield {"type": "error", "content": str(e)}
return

# Send multimodal content to Claude using async generator format
# The SDK's query() accepts AsyncIterable[dict] for custom message formats
await self.client.query(make_multimodal_message(content_blocks))
logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
logger.info(f"Sent multimodal message with {len(attachments)} attachment(s)")
else:
# Text-only message: use string format
await self.client.query(message)
Expand Down
Loading
Loading