Testing

🧪 How to write and run tests for ReasonKit.

Testing is essential for maintaining quality. ReasonKit uses Rust’s built-in testing framework with additional tooling for benchmarks and integration tests.

Test Types

Type	Location	Purpose	Run Command
Unit	`src/*/.rs`	Test individual functions	`cargo test`
Integration	`tests/*.rs`	Test module interactions	`cargo test --test '*'`
Doc tests	Doc comments	Ensure examples work	`cargo test --doc`
Benchmarks	`benches/*.rs`	Performance regression	`cargo bench`

Running Tests

All Tests

# Run all tests
cargo test

# Run with output (see println! in tests)
cargo test -- --nocapture

# Run in release mode (faster, catches different bugs)
cargo test --release

Specific Tests

# Run tests matching a name
cargo test gigathink

# Run tests in a specific module
cargo test thinktool::

# Run a single test
cargo test test_gigathink_default_config

# Run ignored tests (slow/expensive)
cargo test -- --ignored

Test Features

# Run with all features
cargo test --all-features

# Run with specific feature
cargo test --features embeddings-local

Writing Unit Tests

Basic Structure

#![allow(unused)]
fn main() {
// In src/thinktool/gigathink.rs

pub fn count_perspectives(config: &Config) -> usize {
    config.perspectives.unwrap_or(10)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_count_perspectives_default() {
        let config = Config::default();
        assert_eq!(count_perspectives(&config), 10);
    }

    #[test]
    fn test_count_perspectives_custom() {
        let config = Config {
            perspectives: Some(15),
            ..Default::default()
        };
        assert_eq!(count_perspectives(&config), 15);
    }
}
}

Testing Errors

#![allow(unused)]
fn main() {
#[test]
fn test_invalid_input_returns_error() {
    let result = parse_input("");
    assert!(result.is_err());

    // Check error type
    let err = result.unwrap_err();
    assert!(matches!(err, ReasonKitError::Parse(_)));
}

#[test]
#[should_panic(expected = "cannot be empty")]
fn test_panics_on_empty() {
    validate_required("");  // Should panic
}
}

Testing Async Code

#![allow(unused)]
fn main() {
use tokio;

#[tokio::test]
async fn test_async_llm_call() {
    let client = MockClient::new();
    let result = call_llm(&client, "test prompt").await;
    assert!(result.is_ok());
}

#[tokio::test]
async fn test_timeout_handling() {
    let client = SlowMockClient::new(Duration::from_secs(10));
    let result = tokio::time::timeout(
        Duration::from_secs(1),
        call_llm(&client, "test"),
    ).await;

    assert!(result.is_err());  // Should timeout
}
}

Test Fixtures

#![allow(unused)]
fn main() {
// In tests/common/mod.rs
pub fn sample_config() -> Config {
    Config {
        profile: Profile::Balanced,
        provider: Provider::Mock,
        timeout: Duration::from_secs(30),
    }
}

pub fn sample_input() -> &'static str {
    "Should I accept this job offer with 20% higher salary?"
}

// In tests/integration_test.rs
mod common;

#[test]
fn test_with_fixtures() {
    let config = common::sample_config();
    let input = common::sample_input();
    // ...
}
}

Writing Integration Tests

Integration tests go in the tests/ directory:

#![allow(unused)]
fn main() {
// tests/thinktool_integration.rs

use reasonkit_core::{run_analysis, Config, Profile};

#[test]
fn test_full_analysis_pipeline() {
    let config = Config {
        profile: Profile::Quick,
        provider: Provider::Mock,
        ..Default::default()
    };

    let result = run_analysis("Test question", &config);

    assert!(result.is_ok());
    let analysis = result.unwrap();
    assert!(!analysis.synthesis.is_empty());
    assert!(analysis.confidence > 0.0);
}

#[test]
fn test_profile_affects_depth() {
    let quick = run_with_profile(Profile::Quick).unwrap();
    let deep = run_with_profile(Profile::Deep).unwrap();

    // Deep should have more perspectives
    assert!(deep.perspectives.len() > quick.perspectives.len());
}
}

Mocking

Mock LLM Provider

#![allow(unused)]
fn main() {
use mockall::{automock, predicate::*};

#[automock]
pub trait LlmProvider {
    async fn complete(&self, prompt: &str) -> Result<String>;
}

#[tokio::test]
async fn test_with_mock_provider() {
    let mut mock = MockLlmProvider::new();
    mock.expect_complete()
        .with(predicate::str::contains("GigaThink"))
        .returning(|_| Ok("Mocked response".to_string()));

    let result = gigathink("test", &mock).await;
    assert!(result.is_ok());
}
}

Test Doubles

#![allow(unused)]
fn main() {
// Simple test double for deterministic testing
pub struct TestProvider {
    responses: HashMap<String, String>,
}

impl TestProvider {
    pub fn new() -> Self {
        Self {
            responses: HashMap::new(),
        }
    }

    pub fn with_response(mut self, contains: &str, response: &str) -> Self {
        self.responses.insert(contains.to_string(), response.to_string());
        self
    }
}

impl LlmProvider for TestProvider {
    async fn complete(&self, prompt: &str) -> Result<String> {
        for (key, value) in &self.responses {
            if prompt.contains(key) {
                return Ok(value.clone());
            }
        }
        Ok("Default response".to_string())
    }
}
}

Benchmarks

Writing Benchmarks

#![allow(unused)]
fn main() {
// benches/thinktool_bench.rs

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use reasonkit_core::thinktool;

fn benchmark_gigathink(c: &mut Criterion) {
    let config = Config::default();
    let input = "Test question for benchmarking";

    c.bench_function("gigathink_default", |b| {
        b.iter(|| {
            thinktool::gigathink(black_box(input), black_box(&config))
        })
    });
}

fn benchmark_profiles(c: &mut Criterion) {
    let mut group = c.benchmark_group("profiles");

    for profile in [Profile::Quick, Profile::Balanced, Profile::Deep] {
        group.bench_function(format!("{:?}", profile), |b| {
            b.iter(|| run_with_profile(black_box(profile)))
        });
    }

    group.finish();
}

criterion_group!(benches, benchmark_gigathink, benchmark_profiles);
criterion_main!(benches);
}

Running Benchmarks

# Run all benchmarks
cargo bench

# Run specific benchmark
cargo bench gigathink

# Compare against baseline
cargo bench -- --baseline main

# Generate HTML report
cargo bench -- --noplot  # Skip plots if no gnuplot

Test Coverage

Measuring Coverage

# Install coverage tool
cargo install cargo-tarpaulin

# Generate coverage report
cargo tarpaulin --out Html

# Coverage with specific features
cargo tarpaulin --all-features --out Html

Coverage Goals

Component	Target Coverage
Core logic	> 80%
Error paths	> 70%
Edge cases	> 60%
Overall	> 75%

CI Integration

Tests run automatically on every PR:

# .github/workflows/test.yml
name: Tests
on: [push, pull_request]

jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: dtolnay/rust-toolchain@stable

      - name: Run tests
        run: cargo test --all-features

      - name: Run clippy
        run: cargo clippy -- -D warnings

      - name: Check formatting
        run: cargo fmt --check

Test Best Practices

Do

Test one thing per test
Use descriptive test names
Test edge cases and error conditions
Keep tests fast (< 100ms each)
Use fixtures for common setup

Don’t

Test private implementation details
Rely on test execution order
Use sleep() for timing (use mocks)
Write flaky tests that sometimes fail
Skip writing tests “for now”

Debugging Tests

# Run with debug output
RUST_BACKTRACE=1 cargo test -- --nocapture

# Run single test with logging
RUST_LOG=debug cargo test test_name -- --nocapture

# Run test in debugger
rust-gdb target/debug/deps/reasonkit_core-*

Code Style — Coding standards
Pull Requests — PR guidelines
Architecture — System design

Keyboard shortcuts

ReasonKit Documentation