"""Unit tests for DataValidator (candle validation and gap detection).""" import tempfile from datetime import datetime, timedelta from decimal import Decimal from pathlib import Path import pytest from tradefinder.adapters.types import Candle from tradefinder.data.storage import DataStorage from tradefinder.data.validator import DataValidator class TestDataValidatorCandleValidation: """Tests for single candle validation.""" def test_validate_candle_valid_candle(self) -> None: """Valid candle returns empty errors list.""" candle = Candle( timestamp=datetime.now(), open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert errors == [] def test_validate_candle_high_below_low(self) -> None: """High < low is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("50000.00"), high=Decimal("49000.00"), # Invalid low=Decimal("51000.00"), # Invalid close=Decimal("50500.00"), volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "high < low" in errors def test_validate_candle_high_below_open(self) -> None: """High < open is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("51000.00"), # Invalid high=Decimal("50000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "high < open" in errors def test_validate_candle_high_below_close(self) -> None: """High < close is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("50000.00"), high=Decimal("50000.00"), low=Decimal("49000.00"), close=Decimal("51000.00"), # Invalid volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "high < close" in errors def test_validate_candle_low_above_open(self) -> None: """Low > open is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("49000.00"), # Invalid high=Decimal("51000.00"), low=Decimal("50000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "low > open" in errors def test_validate_candle_low_above_close(self) -> None: """Low > close is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("51000.00"), # Invalid close=Decimal("49000.00"), # Invalid volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "low > close" in errors def test_validate_candle_negative_volume(self) -> None: """Negative volume is detected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("-100.50"), # Invalid ) errors = DataValidator.validate_candle(candle) assert "volume < 0" in errors def test_validate_candle_non_datetime_timestamp(self) -> None: """Non-datetime timestamp is detected.""" candle = Candle( timestamp="2024-01-01", # Invalid type open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ) errors = DataValidator.validate_candle(candle) assert "timestamp must be datetime" in errors def test_validate_candle_multiple_errors(self) -> None: """Multiple validation errors are collected.""" candle = Candle( timestamp=datetime.now(), open=Decimal("52000.00"), # > high high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("48000.00"), # < low volume=Decimal("-100.50"), # Negative ) errors = DataValidator.validate_candle(candle) assert len(errors) >= 3 assert any("high < open" in error for error in errors) assert any("low > close" in error for error in errors) assert any("volume < 0" in error for error in errors) class TestDataValidatorBatchValidation: """Tests for batch candle validation.""" def test_validate_candles_valid_batch(self) -> None: """Valid candles return empty errors list.""" candles = [ Candle( timestamp=datetime(2024, 1, 1, i), open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ) for i in range(3) ] errors = DataValidator.validate_candles(candles) assert errors == [] def test_validate_candles_with_errors(self) -> None: """Invalid candles produce error messages.""" candles = [ Candle( # Valid timestamp=datetime(2024, 1, 1, 0), open=Decimal("50000.00"), high=Decimal("51000.00"), low=Decimal("49000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ), Candle( # Invalid: high < low timestamp=datetime(2024, 1, 1, 1), open=Decimal("50000.00"), high=Decimal("49000.00"), low=Decimal("51000.00"), close=Decimal("50500.00"), volume=Decimal("100.50"), ), ] errors = DataValidator.validate_candles(candles) assert len(errors) == 1 assert "2024-01-01T01:00:00" in errors[0] assert "high < low" in errors[0] class TestDataValidatorGapDetection: """Tests for gap detection in stored data.""" @pytest.fixture def storage(self) -> DataStorage: """Test database fixture.""" with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test.duckdb" storage = DataStorage(db_path) with storage: storage.initialize_schema() yield storage def test_find_gaps_no_data(self, storage: DataStorage) -> None: """No gaps when no data exists.""" start = datetime(2024, 1, 1) end = datetime(2024, 1, 2) gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) assert len(gaps) == 1 assert gaps[0] == (start, end) def test_find_gaps_start_after_end_raises(self, storage: DataStorage) -> None: """ValueError when start > end.""" start = datetime(2024, 1, 2) end = datetime(2024, 1, 1) with pytest.raises(ValueError, match="start must be before end"): DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) def test_find_gaps_continuous_data(self, storage: DataStorage) -> None: """No gaps when data is continuous.""" base_time = datetime(2024, 1, 1, 0) candles = [ Candle( timestamp=base_time + timedelta(hours=i), open=Decimal("50000"), high=Decimal("51000"), low=Decimal("49000"), close=Decimal("50500"), volume=Decimal("100"), ) for i in range(5) ] storage.insert_candles(candles, "BTCUSDT", "1h") start = base_time end = base_time + timedelta(hours=4) gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) assert gaps == [] def test_find_gaps_with_gaps(self, storage: DataStorage) -> None: """Gaps are detected correctly.""" base_time = datetime(2024, 1, 1, 0) # Insert candles at hours 0, 2, 4 (missing 1, 3) candles = [ Candle( timestamp=base_time + timedelta(hours=i * 2), open=Decimal("50000"), high=Decimal("51000"), low=Decimal("49000"), close=Decimal("50500"), volume=Decimal("100"), ) for i in range(3) ] storage.insert_candles(candles, "BTCUSDT", "1h") start = base_time end = base_time + timedelta(hours=4) gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) assert len(gaps) == 2 # Gap between hour 0 and hour 2 (missing hour 1) assert gaps[0] == (base_time + timedelta(hours=1), base_time + timedelta(hours=2)) # Gap between hour 2 and hour 4 (missing hour 3) assert gaps[1] == (base_time + timedelta(hours=3), base_time + timedelta(hours=4)) def test_find_gaps_initial_gap(self, storage: DataStorage) -> None: """Gap at start is detected.""" base_time = datetime(2024, 1, 1, 0) candles = [ Candle( timestamp=base_time + timedelta(hours=2), # Start at hour 2 open=Decimal("50000"), high=Decimal("51000"), low=Decimal("49000"), close=Decimal("50500"), volume=Decimal("100"), ) ] storage.insert_candles(candles, "BTCUSDT", "1h") start = base_time end = base_time + timedelta(hours=3) gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) assert len(gaps) == 1 # Gap from start to hour 2 assert gaps[0] == (start, base_time + timedelta(hours=2)) def test_find_gaps_trailing_gap(self, storage: DataStorage) -> None: """Gap at end is detected.""" base_time = datetime(2024, 1, 1, 0) candles = [ Candle( timestamp=base_time, open=Decimal("50000"), high=Decimal("51000"), low=Decimal("49000"), close=Decimal("50500"), volume=Decimal("100"), ) ] storage.insert_candles(candles, "BTCUSDT", "1h") start = base_time end = base_time + timedelta(hours=2) gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end) assert len(gaps) == 1 # Gap from hour 1 to end assert gaps[0] == (base_time + timedelta(hours=1), end) class TestDataValidatorGapReport: """Tests for gap reporting functionality.""" @pytest.fixture def storage(self) -> DataStorage: """Test database fixture.""" with tempfile.TemporaryDirectory() as tmpdir: db_path = Path(tmpdir) / "test.duckdb" storage = DataStorage(db_path) with storage: storage.initialize_schema() yield storage def test_get_gap_report_empty_database(self, storage: DataStorage) -> None: """Empty database returns zero gaps.""" report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h") assert report["symbol"] == "BTCUSDT" assert report["timeframe"] == "1h" assert report["gap_count"] == 0 assert report["total_gap_seconds"] == 0.0 assert report["max_gap_seconds"] == 0.0 assert report["gaps"] == [] assert report["checked_from"] is None assert report["checked_to"] is None def test_get_gap_report_with_data(self, storage: DataStorage) -> None: """Gap report includes gap statistics.""" base_time = datetime(2024, 1, 1, 0) # Insert candles at hours 0, 2, 4 (missing 1, 3) candles = [ Candle( timestamp=base_time + timedelta(hours=i * 2), open=Decimal("50000"), high=Decimal("51000"), low=Decimal("49000"), close=Decimal("50500"), volume=Decimal("100"), ) for i in range(3) ] storage.insert_candles(candles, "BTCUSDT", "1h") report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h") assert report["symbol"] == "BTCUSDT" assert report["timeframe"] == "1h" assert report["gap_count"] == 2 assert report["total_gap_seconds"] == 7200.0 # 2 hours in seconds assert report["max_gap_seconds"] == 3600.0 # 1 hour in seconds assert len(report["gaps"]) == 2 assert report["checked_from"] == base_time assert report["checked_to"] == base_time + timedelta(hours=4) class TestDataValidatorTimeframeInterval: """Tests for timeframe interval calculation.""" def test_interval_for_timeframe_1m(self) -> None: """1m timeframe interval is 1 minute.""" interval = DataValidator._interval_for_timeframe("1m") assert interval == timedelta(minutes=1) def test_interval_for_timeframe_1h(self) -> None: """1h timeframe interval is 1 hour.""" interval = DataValidator._interval_for_timeframe("1h") assert interval == timedelta(hours=1) def test_interval_for_timeframe_1d(self) -> None: """1d timeframe interval is 1 day.""" interval = DataValidator._interval_for_timeframe("1d") assert interval == timedelta(days=1) def test_interval_for_timeframe_unknown_raises(self) -> None: """Unknown timeframe raises ValueError.""" with pytest.raises(ValueError, match="Unknown timeframe"): DataValidator._interval_for_timeframe("unknown")