Getting Started

Install and use forgetless in your project.

Installation

toml
[dependencies]
forgetless = "0.1"

Basic Usage

rust
use forgetless::{Forgetless, Config};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let result = Forgetless::new()
.config(Config::default().context_limit(10_000))
.add(&large_content)
.run()
.await?;
println!("{}", result.content);
Ok(())
}
output
Input:  247,000 tokens
Output: 9,842 tokens

Config Options

rust
use forgetless::{Forgetless, Config};
let result = Forgetless::new()
.config(Config::default()
.context_limit(128_000) // Max output tokens
.vision_llm(true) // LLM for image descriptions
.context_llm(true) // LLM for smart scoring
.chunk_size(256) // Target chunk size
.parallel(true) // Parallel processing
.cache(true)) // Embedding cache
.add_file("diagram.png")
.add_file("research.pdf")
.query("Explain the architecture")
.run()
.await?;

Priority

rust
use forgetless::{Forgetless, Config, WithPriority};
let result = Forgetless::new()
.config(Config::default().context_limit(50_000))
.add(WithPriority::critical("System prompt")) // Always kept
.add(WithPriority::high(&conversation)) // High priority
.add(&documents) // Medium (default)
.add(WithPriority::low(&logs)) // Low priority
.run()
.await?;

Files

rust
use forgetless::{Forgetless, Config, FileWithPriority};
let result = Forgetless::new()
.config(Config::default().context_limit(100_000))
.add_file("README.md")
.add_file(FileWithPriority::high("main.rs"))
.add_files(&["lib.rs", "config.rs"])
.run()
.await?;

Result

rust
result.content // Optimized content
result.total_tokens // Output token count
result.stats.input_tokens // Input token count
result.stats.compression_ratio // e.g., 14.5
result.stats.chunks_processed // Total chunks
result.stats.chunks_selected // Selected chunks

How It Works

  1. Content is split into semantic chunks
  2. Each chunk is embedded locally (all-MiniLM-L6-v2)
  3. Chunks are scored using hybrid algorithm:
    • Algorithmic: priority, recency, position
    • Semantic: similarity to query (if provided)
    • LLM: intelligent scoring (if enabled)
  4. Top chunks selected within token budget
  5. Chunks assembled into final optimized context