The frontend is built using React with TypeScript and follows a component-based architecture:
frontend/
├── src/
│ ├── components/
│ │ ├── ChatInterface.jsx
│ │ ├── Sidebar.jsx
│ │ └── WelcomePage.jsx
│ ├── services/
│ │ └── api.js
│ └── App.jsx
The backend follows a modular structure with clear separation of concerns:
backend/
├── app/
│ ├── api/
│ │ ├── endpoints/
│ │ └── pdf_routes.py
│ ├── models/
│ ├── routers/
│ │ ├── files.py
│ │ └── chat.py
│ └── main.py
├── vector_store/
└── embedding_cache/
The PDF processing system is built using PyMuPDF (fitz) and implements several key features:
class PDFProcessor:
def __init__(self):
self.cache = {}
async def process_pdf(self, pdf_data: bytes) -> Dict[str, any]:
"""
Process PDF with caching and error handling
"""
try:
# Check cache first
cache_key = self._generate_cache_key(pdf_data)
if cache_key in self.cache:
return self.cache[cache_key]
# Process PDF
result = await self._extract_content(pdf_data)
# Cache result
self.cache[cache_key] = result
return result
except Exception as e:
logger.error(f"PDF processing error: {str(e)}")
raise PDFProcessingError(str(e))
The system uses a vector store for efficient document search and retrieval:
class VectorStore:
def __init__(self, store_path: str):
self.store_path = store_path
self.initialize_store()
def add_document(self, text: str, metadata: Dict[str, any]):
"""
Add document to vector store with embeddings
"""
try:
# Generate embeddings
embeddings = self._generate_embeddings(text)
# Store in vector database
self.store.add(
embeddings=embeddings,
documents=[text],
metadatas=[metadata]
)
except Exception as e:
logger.error(f"Vector store error: {str(e)}")
raise VectorStoreError(str(e))
The chat system implements real-time communication with error handling and state management:
// Frontend Chat Implementation
const ChatSystem = {
state: {
messages: [],
isLoading: false,
error: null
},
async sendMessage(message) {
try {
this.state.isLoading = true;
const response = await api.post('/chat', { message });
this.state.messages.push(response);
} catch (error) {
this.state.error = error.message;
} finally {
this.state.isLoading = false;
}
}
};
The system uses SQLAlchemy for database management with the following key models:
class File(Base):
__tablename__ = "files"
id = Column(Integer, primary_key=True)
filename = Column(String, unique=True)
file_path = Column(String)
file_type = Column(String)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
class ChatHistory(Base):
__tablename__ = "chat_history"
id = Column(Integer, primary_key=True)
file_id = Column(Integer, ForeignKey("files.id"))
message = Column(String)
response = Column(String)
timestamp = Column(DateTime, default=datetime.utcnow)