CryptoTrading/tests/test_validator.py

"""Unit tests for DataValidator (candle validation and gap detection)."""

import tempfile
from datetime import datetime, timedelta
from decimal import Decimal
from pathlib import Path

import pytest

from tradefinder.adapters.types import Candle
from tradefinder.data.storage import DataStorage
from tradefinder.data.validator import DataValidator


class TestDataValidatorCandleValidation:
    """Tests for single candle validation."""

    def test_validate_candle_valid_candle(self) -> None:
        """Valid candle returns empty errors list."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("50000.00"),
            high=Decimal("51000.00"),
            low=Decimal("49000.00"),
            close=Decimal("50500.00"),
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert errors == []

    def test_validate_candle_high_below_low(self) -> None:
        """High < low is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("50000.00"),
            high=Decimal("49000.00"),  # Invalid
            low=Decimal("51000.00"),  # Invalid
            close=Decimal("50500.00"),
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "high < low" in errors

    def test_validate_candle_high_below_open(self) -> None:
        """High < open is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("51000.00"),  # Invalid
            high=Decimal("50000.00"),
            low=Decimal("49000.00"),
            close=Decimal("50500.00"),
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "high < open" in errors

    def test_validate_candle_high_below_close(self) -> None:
        """High < close is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("50000.00"),
            high=Decimal("50000.00"),
            low=Decimal("49000.00"),
            close=Decimal("51000.00"),  # Invalid
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "high < close" in errors

    def test_validate_candle_low_above_open(self) -> None:
        """Low > open is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("49000.00"),  # Invalid
            high=Decimal("51000.00"),
            low=Decimal("50000.00"),
            close=Decimal("50500.00"),
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "low > open" in errors

    def test_validate_candle_low_above_close(self) -> None:
        """Low > close is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("50000.00"),
            high=Decimal("51000.00"),
            low=Decimal("51000.00"),  # Invalid
            close=Decimal("49000.00"),  # Invalid
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "low > close" in errors

    def test_validate_candle_negative_volume(self) -> None:
        """Negative volume is detected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("50000.00"),
            high=Decimal("51000.00"),
            low=Decimal("49000.00"),
            close=Decimal("50500.00"),
            volume=Decimal("-100.50"),  # Invalid
        )
        errors = DataValidator.validate_candle(candle)
        assert "volume < 0" in errors

    def test_validate_candle_non_datetime_timestamp(self) -> None:
        """Non-datetime timestamp is detected."""
        candle = Candle(
            timestamp="2024-01-01",  # Invalid type
            open=Decimal("50000.00"),
            high=Decimal("51000.00"),
            low=Decimal("49000.00"),
            close=Decimal("50500.00"),
            volume=Decimal("100.50"),
        )
        errors = DataValidator.validate_candle(candle)
        assert "timestamp must be datetime" in errors

    def test_validate_candle_multiple_errors(self) -> None:
        """Multiple validation errors are collected."""
        candle = Candle(
            timestamp=datetime.now(),
            open=Decimal("52000.00"),  # > high
            high=Decimal("51000.00"),
            low=Decimal("49000.00"),
            close=Decimal("48000.00"),  # < low
            volume=Decimal("-100.50"),  # Negative
        )
        errors = DataValidator.validate_candle(candle)
        assert len(errors) >= 3
        assert any("high < open" in error for error in errors)
        assert any("low > close" in error for error in errors)
        assert any("volume < 0" in error for error in errors)


class TestDataValidatorBatchValidation:
    """Tests for batch candle validation."""

    def test_validate_candles_valid_batch(self) -> None:
        """Valid candles return empty errors list."""
        candles = [
            Candle(
                timestamp=datetime(2024, 1, 1, i),
                open=Decimal("50000.00"),
                high=Decimal("51000.00"),
                low=Decimal("49000.00"),
                close=Decimal("50500.00"),
                volume=Decimal("100.50"),
            )
            for i in range(3)
        ]
        errors = DataValidator.validate_candles(candles)
        assert errors == []

    def test_validate_candles_with_errors(self) -> None:
        """Invalid candles produce error messages."""
        candles = [
            Candle(  # Valid
                timestamp=datetime(2024, 1, 1, 0),
                open=Decimal("50000.00"),
                high=Decimal("51000.00"),
                low=Decimal("49000.00"),
                close=Decimal("50500.00"),
                volume=Decimal("100.50"),
            ),
            Candle(  # Invalid: high < low
                timestamp=datetime(2024, 1, 1, 1),
                open=Decimal("50000.00"),
                high=Decimal("49000.00"),
                low=Decimal("51000.00"),
                close=Decimal("50500.00"),
                volume=Decimal("100.50"),
            ),
        ]
        errors = DataValidator.validate_candles(candles)
        assert len(errors) == 1
        assert "2024-01-01T01:00:00" in errors[0]
        assert "high < low" in errors[0]


class TestDataValidatorGapDetection:
    """Tests for gap detection in stored data."""

    @pytest.fixture
    def storage(self) -> DataStorage:
        """Test database fixture."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"
            storage = DataStorage(db_path)
            with storage:
                storage.initialize_schema()
                yield storage

    def test_find_gaps_no_data(self, storage: DataStorage) -> None:
        """No gaps when no data exists."""
        start = datetime(2024, 1, 1)
        end = datetime(2024, 1, 2)
        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
        assert len(gaps) == 1
        assert gaps[0] == (start, end)

    def test_find_gaps_start_after_end_raises(self, storage: DataStorage) -> None:
        """ValueError when start > end."""
        start = datetime(2024, 1, 2)
        end = datetime(2024, 1, 1)
        with pytest.raises(ValueError, match="start must be before end"):
            DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)

    def test_find_gaps_continuous_data(self, storage: DataStorage) -> None:
        """No gaps when data is continuous."""
        base_time = datetime(2024, 1, 1, 0)
        candles = [
            Candle(
                timestamp=base_time + timedelta(hours=i),
                open=Decimal("50000"),
                high=Decimal("51000"),
                low=Decimal("49000"),
                close=Decimal("50500"),
                volume=Decimal("100"),
            )
            for i in range(5)
        ]
        storage.insert_candles(candles, "BTCUSDT", "1h")

        start = base_time
        end = base_time + timedelta(hours=4)
        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
        assert gaps == []

    def test_find_gaps_with_gaps(self, storage: DataStorage) -> None:
        """Gaps are detected correctly."""
        base_time = datetime(2024, 1, 1, 0)
        # Insert candles at hours 0, 2, 4 (missing 1, 3)
        candles = [
            Candle(
                timestamp=base_time + timedelta(hours=i * 2),
                open=Decimal("50000"),
                high=Decimal("51000"),
                low=Decimal("49000"),
                close=Decimal("50500"),
                volume=Decimal("100"),
            )
            for i in range(3)
        ]
        storage.insert_candles(candles, "BTCUSDT", "1h")

        start = base_time
        end = base_time + timedelta(hours=4)
        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)

        assert len(gaps) == 2
        # Gap between hour 0 and hour 2 (missing hour 1)
        assert gaps[0] == (base_time + timedelta(hours=1), base_time + timedelta(hours=2))
        # Gap between hour 2 and hour 4 (missing hour 3)
        assert gaps[1] == (base_time + timedelta(hours=3), base_time + timedelta(hours=4))

    def test_find_gaps_initial_gap(self, storage: DataStorage) -> None:
        """Gap at start is detected."""
        base_time = datetime(2024, 1, 1, 0)
        candles = [
            Candle(
                timestamp=base_time + timedelta(hours=2),  # Start at hour 2
                open=Decimal("50000"),
                high=Decimal("51000"),
                low=Decimal("49000"),
                close=Decimal("50500"),
                volume=Decimal("100"),
            )
        ]
        storage.insert_candles(candles, "BTCUSDT", "1h")

        start = base_time
        end = base_time + timedelta(hours=3)
        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)

        assert len(gaps) == 1
        # Gap from start to hour 2
        assert gaps[0] == (start, base_time + timedelta(hours=2))

    def test_find_gaps_trailing_gap(self, storage: DataStorage) -> None:
        """Gap at end is detected."""
        base_time = datetime(2024, 1, 1, 0)
        candles = [
            Candle(
                timestamp=base_time,
                open=Decimal("50000"),
                high=Decimal("51000"),
                low=Decimal("49000"),
                close=Decimal("50500"),
                volume=Decimal("100"),
            )
        ]
        storage.insert_candles(candles, "BTCUSDT", "1h")

        start = base_time
        end = base_time + timedelta(hours=2)
        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)

        assert len(gaps) == 1
        # Gap from hour 1 to end
        assert gaps[0] == (base_time + timedelta(hours=1), end)


class TestDataValidatorGapReport:
    """Tests for gap reporting functionality."""

    @pytest.fixture
    def storage(self) -> DataStorage:
        """Test database fixture."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"
            storage = DataStorage(db_path)
            with storage:
                storage.initialize_schema()
                yield storage

    def test_get_gap_report_empty_database(self, storage: DataStorage) -> None:
        """Empty database returns zero gaps."""
        report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h")
        assert report["symbol"] == "BTCUSDT"
        assert report["timeframe"] == "1h"
        assert report["gap_count"] == 0
        assert report["total_gap_seconds"] == 0.0
        assert report["max_gap_seconds"] == 0.0
        assert report["gaps"] == []
        assert report["checked_from"] is None
        assert report["checked_to"] is None

    def test_get_gap_report_with_data(self, storage: DataStorage) -> None:
        """Gap report includes gap statistics."""
        base_time = datetime(2024, 1, 1, 0)
        # Insert candles at hours 0, 2, 4 (missing 1, 3)
        candles = [
            Candle(
                timestamp=base_time + timedelta(hours=i * 2),
                open=Decimal("50000"),
                high=Decimal("51000"),
                low=Decimal("49000"),
                close=Decimal("50500"),
                volume=Decimal("100"),
            )
            for i in range(3)
        ]
        storage.insert_candles(candles, "BTCUSDT", "1h")

        report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h")
        assert report["symbol"] == "BTCUSDT"
        assert report["timeframe"] == "1h"
        assert report["gap_count"] == 2
        assert report["total_gap_seconds"] == 7200.0  # 2 hours in seconds
        assert report["max_gap_seconds"] == 3600.0  # 1 hour in seconds
        assert len(report["gaps"]) == 2
        assert report["checked_from"] == base_time
        assert report["checked_to"] == base_time + timedelta(hours=4)


class TestDataValidatorTimeframeInterval:
    """Tests for timeframe interval calculation."""

    def test_interval_for_timeframe_1m(self) -> None:
        """1m timeframe interval is 1 minute."""
        interval = DataValidator._interval_for_timeframe("1m")
        assert interval == timedelta(minutes=1)

    def test_interval_for_timeframe_1h(self) -> None:
        """1h timeframe interval is 1 hour."""
        interval = DataValidator._interval_for_timeframe("1h")
        assert interval == timedelta(hours=1)

    def test_interval_for_timeframe_1d(self) -> None:
        """1d timeframe interval is 1 day."""
        interval = DataValidator._interval_for_timeframe("1d")
        assert interval == timedelta(days=1)

    def test_interval_for_timeframe_unknown_raises(self) -> None:
        """Unknown timeframe raises ValueError."""
        with pytest.raises(ValueError, match="Unknown timeframe"):
            DataValidator._interval_for_timeframe("unknown")