204 lines
10 KiB
Text
204 lines
10 KiB
Text
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|||
|
|
<svg viewBox="0 0 980 530" xmlns="http://www.w3.org/2000/svg" style="max-width: 100%; height: auto;">
|
|||
|
|
<defs>
|
|||
|
|
<!-- Arrow markers -->
|
|||
|
|
<marker id="arrow" markerWidth="10" markerHeight="10" refX="9" refY="3" orient="auto" markerUnits="strokeWidth">
|
|||
|
|
<path d="M0,0 L0,6 L9,3 z" fill="#2563EB">
|
|||
|
|
</marker>
|
|||
|
|
|
|||
|
|
<!-- Drop shadow for depth -->
|
|||
|
|
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
|||
|
|
<feGaussianBlur in="SourceAlpha" stdDeviation="2">
|
|||
|
|
<feOffset dx="1" dy="1" result="offsetblur">
|
|||
|
|
<feComponentTransfer>
|
|||
|
|
<feFuncA type="linear" slope="0.2">
|
|||
|
|
</feComponentTransfer>
|
|||
|
|
<feMerge>
|
|||
|
|
<feMergeNode/>
|
|||
|
|
<feMergeNode in="SourceGraphic">
|
|||
|
|
</feMerge>
|
|||
|
|
</filter>
|
|||
|
|
</defs>
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
<!-- Beautiful gradient definitions for depth -->
|
|||
|
|
|
|||
|
|
|
|||
|
|
<!-- White background with subtle border -->
|
|||
|
|
<rect x="0" y="0" width="980" height="530" fill="#F9FAFB" stroke="#9CA3AF" stroke-width="2" rx="8">
|
|||
|
|
|
|||
|
|
<!-- Content container with proper margins -->
|
|||
|
|
<g transform="translate(40, 40)">
|
|||
|
|
<!-- Define gradients and effects -->
|
|||
|
|
|
|||
|
|
|
|||
|
|
<!-- Background -->
|
|||
|
|
<rect x="0" y="0" width="900" height="450" fill="#4B5563" rx="8" filter="url(#shadow)">
|
|||
|
|
|
|||
|
|
<!-- Title -->
|
|||
|
|
<text x="450" y="30" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="18" font-weight="600" fill="#4B5563">
|
|||
|
|
Vector Database Storage Requirements: The Real Mathematics
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Original Documents Section -->
|
|||
|
|
<rect x="50" y="60" width="180" height="380" fill="url(#origGrad)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="85" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="16" font-weight="600" fill="#4B5563">
|
|||
|
|
Original Documents
|
|||
|
|
</text>
|
|||
|
|
<text x="140" y="105" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
1 TB Total
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- File type breakdown -->
|
|||
|
|
<rect x="70" y="120" width="140" height="30" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="140" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
PDF: 400 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="70" y="155" width="140" height="25" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="172" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
DOCX: 250 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="70" y="185" width="140" height="20" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="200" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
XLSX: 150 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="70" y="210" width="140" height="15" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="223" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
TXT: 100 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="70" y="230" width="140" height="15" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="243" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
HTML: 50 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="70" y="250" width="140" height="10" fill="rgba(59, 130, 246, 0.05)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="140" y="258" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Other: 50 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Arrow -->
|
|||
|
|
<path d="M 240 250 L 290 250" stroke="#2563EB" stroke-width="3" fill="none" marker-end="url(#arrowhead2)" opacity="0.7">
|
|||
|
|
<text x="265" y="240" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Processing
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Vector Database Storage -->
|
|||
|
|
<rect x="300" y="60" width="250" height="380" fill="url(#vectorGrad)" stroke="#2563EB" stroke-width="2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="85" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="16" font-weight="600" fill="#4B5563">
|
|||
|
|
Vector DB Storage
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="105" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~3.5 TB Required
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Storage breakdown -->
|
|||
|
|
<rect x="320" y="120" width="210" height="50" fill="rgba(16, 185, 129, 0.05)" stroke="#2563EB" stroke-width="2" stroke-dasharray="3,2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="135" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="500" fill="#4B5563">
|
|||
|
|
Raw Text Extracted
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="150" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~800 GB (cleaned)
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="163" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Deduplication reduces 20%
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="320" y="175" width="210" height="60" fill="rgba(16, 185, 129, 0.05)" stroke="#2563EB" stroke-width="2" stroke-dasharray="3,2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="190" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="500" fill="#4B5563">
|
|||
|
|
Vector Embeddings
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="205" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~1.2 TB (384-dim floats)
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="218" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
4 bytes × 384 × ~800M chunks
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="230" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
= 1,228 GB
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="320" y="240" width="210" height="55" fill="rgba(16, 185, 129, 0.05)" stroke="#2563EB" stroke-width="2" stroke-dasharray="3,2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="255" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="500" fill="#4B5563">
|
|||
|
|
HNSW Index
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="270" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~600 GB
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="283" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Graph structure + links
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="320" y="300" width="210" height="50" fill="rgba(16, 185, 129, 0.05)" stroke="#2563EB" stroke-width="2" stroke-dasharray="3,2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="315" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="500" fill="#4B5563">
|
|||
|
|
Metadata + Positions
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="330" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~400 GB
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="343" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Doc refs, chunks, offsets
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<rect x="320" y="355" width="210" height="45" fill="rgba(16, 185, 129, 0.05)" stroke="#2563EB" stroke-width="2" stroke-dasharray="3,2" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="425" y="370" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="500" fill="#4B5563">
|
|||
|
|
Cache + Auxiliary
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="385" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
~500 GB
|
|||
|
|
</text>
|
|||
|
|
<text x="425" y="395" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Query cache, temp indices
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Total comparison -->
|
|||
|
|
<rect x="570" y="150" width="300" height="200" fill="url(#multiGrad)" stroke="#2563EB" stroke-width="2" stroke-dasharray="5,3" rx="8" filter="url(#shadow)">
|
|||
|
|
<text x="720" y="175" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="16" font-weight="600" fill="#4B5563">
|
|||
|
|
Storage Multiplication Factor
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<text x="590" y="205" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
Original Documents: 1.0 TB
|
|||
|
|
</text>
|
|||
|
|
<text x="590" y="230" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="600" fill="#4B5563">
|
|||
|
|
Vector DB Total: 3.5 TB
|
|||
|
|
</text>
|
|||
|
|
<text x="590" y="255" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="600" fill="#4B5563">
|
|||
|
|
Multiplication Factor: 3.5×
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<text x="590" y="285" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563">
|
|||
|
|
With redundancy/backup:
|
|||
|
|
</text>
|
|||
|
|
<text x="590" y="305" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-weight="600" fill="#4B5563">
|
|||
|
|
Production Total: 7.0 TB (2× replica)
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<text x="720" y="335" text-anchor="middle" font-family="system-ui, -apple-system, sans-serif" font-size="14" font-style="italic" fill="#4B5563">
|
|||
|
|
Reality: You need 3.5-7× your document storage
|
|||
|
|
</text>
|
|||
|
|
|
|||
|
|
<!-- Visual indicators -->
|
|||
|
|
<g transform="translate(820, 80)">
|
|||
|
|
<circle cx="0" cy="0" r="3" fill="#4B5563">
|
|||
|
|
<text x="10" y="4" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563" filter="url(#textShadow)">Input</text>
|
|||
|
|
</g>
|
|||
|
|
|
|||
|
|
<g transform="translate(820, 100)">
|
|||
|
|
<circle cx="0" cy="0" r="3" fill="#4B5563">
|
|||
|
|
<text x="10" y="4" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563" filter="url(#textShadow)">Storage</text>
|
|||
|
|
</g>
|
|||
|
|
|
|||
|
|
<g transform="translate(820, 120)">
|
|||
|
|
<circle cx="0" cy="0" r="3" fill="#4B5563">
|
|||
|
|
<text x="10" y="4" font-family="system-ui, -apple-system, sans-serif" font-size="14" fill="#4B5563" filter="url(#textShadow)">Factor</text>
|
|||
|
|
</g>
|
|||
|
|
|
|||
|
|
|
|||
|
|
</g>
|
|||
|
|
|
|||
|
|
</svg>
|