Files
Resume-Matcher/apps/backend/app/services/parser.py
srbhr a5d76610f8 feat: improve LLM JSON parsing and fix resume UI styling
Backend:
- Add JSON mode support for reliable structured output
- Implement retry logic with 2 attempts for LLM calls
- Add bracket-matching JSON extraction for malformed responses
- Simplify prompts to work better across LLM providers

Frontend:
- Add back button to Resume Builder with Swiss style
- Increase resume width from 210mm to 250mm (~20% wider)
- Apply consistent Swiss-style shadows (solid black offset)
- Fix shadow clipping at bottom with proper padding
- Add overflow-y-auto for scrollable resume viewer

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-24 04:37:41 +05:30

62 lines
1.6 KiB
Python

"""Document parsing service using markitdown and LLM."""
import tempfile
from pathlib import Path
from typing import Any
from markitdown import MarkItDown
from app.llm import complete_json
from app.prompts import PARSE_RESUME_PROMPT
from app.prompts.templates import RESUME_SCHEMA_EXAMPLE
from app.schemas import ResumeData
async def parse_document(content: bytes, filename: str) -> str:
"""Convert PDF/DOCX to Markdown using markitdown.
Args:
content: Raw file bytes
filename: Original filename for extension detection
Returns:
Markdown text content
"""
suffix = Path(filename).suffix.lower()
# Write to temp file for markitdown
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
tmp.write(content)
tmp_path = Path(tmp.name)
try:
md = MarkItDown()
result = md.convert(str(tmp_path))
return result.text_content
finally:
tmp_path.unlink(missing_ok=True)
async def parse_resume_to_json(markdown_text: str) -> dict[str, Any]:
"""Parse resume markdown to structured JSON using LLM.
Args:
markdown_text: Resume content in markdown format
Returns:
Structured resume data matching ResumeData schema
"""
prompt = PARSE_RESUME_PROMPT.format(
schema=RESUME_SCHEMA_EXAMPLE,
resume_text=markdown_text,
)
result = await complete_json(
prompt=prompt,
system_prompt="You are a JSON extraction engine. Output only valid JSON, no explanations.",
)
# Validate against schema
validated = ResumeData.model_validate(result)
return validated.model_dump()