mirror of
https://github.com/srbhr/Resume-Matcher.git
synced 2026-01-19 23:31:27 +00:00
Backend: - Add JSON mode support for reliable structured output - Implement retry logic with 2 attempts for LLM calls - Add bracket-matching JSON extraction for malformed responses - Simplify prompts to work better across LLM providers Frontend: - Add back button to Resume Builder with Swiss style - Increase resume width from 210mm to 250mm (~20% wider) - Apply consistent Swiss-style shadows (solid black offset) - Fix shadow clipping at bottom with proper padding - Add overflow-y-auto for scrollable resume viewer 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
"""Document parsing service using markitdown and LLM."""
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from markitdown import MarkItDown
|
|
|
|
from app.llm import complete_json
|
|
from app.prompts import PARSE_RESUME_PROMPT
|
|
from app.prompts.templates import RESUME_SCHEMA_EXAMPLE
|
|
from app.schemas import ResumeData
|
|
|
|
|
|
async def parse_document(content: bytes, filename: str) -> str:
|
|
"""Convert PDF/DOCX to Markdown using markitdown.
|
|
|
|
Args:
|
|
content: Raw file bytes
|
|
filename: Original filename for extension detection
|
|
|
|
Returns:
|
|
Markdown text content
|
|
"""
|
|
suffix = Path(filename).suffix.lower()
|
|
|
|
# Write to temp file for markitdown
|
|
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
tmp.write(content)
|
|
tmp_path = Path(tmp.name)
|
|
|
|
try:
|
|
md = MarkItDown()
|
|
result = md.convert(str(tmp_path))
|
|
return result.text_content
|
|
finally:
|
|
tmp_path.unlink(missing_ok=True)
|
|
|
|
|
|
async def parse_resume_to_json(markdown_text: str) -> dict[str, Any]:
|
|
"""Parse resume markdown to structured JSON using LLM.
|
|
|
|
Args:
|
|
markdown_text: Resume content in markdown format
|
|
|
|
Returns:
|
|
Structured resume data matching ResumeData schema
|
|
"""
|
|
prompt = PARSE_RESUME_PROMPT.format(
|
|
schema=RESUME_SCHEMA_EXAMPLE,
|
|
resume_text=markdown_text,
|
|
)
|
|
|
|
result = await complete_json(
|
|
prompt=prompt,
|
|
system_prompt="You are a JSON extraction engine. Output only valid JSON, no explanations.",
|
|
)
|
|
|
|
# Validate against schema
|
|
validated = ResumeData.model_validate(result)
|
|
return validated.model_dump()
|