Add Phase 2 foundation: regime classifier, strategy framework, WebSocket streamer

Phase 1 completion: - Add DataStreamer for real-time Binance Futures WebSocket data (klines, mark price) - Add DataValidator for candle validation and gap detection - Add timeframes module with interval mappings Phase 2 foundation: - Add RegimeClassifier with ADX/ATR/Bollinger Band analysis - Add Regime enum (TRENDING_UP/DOWN, RANGING, HIGH_VOLATILITY, UNCERTAIN) - Add Strategy ABC defining generate_signal, get_stop_loss, parameters, suitable_regimes - Add Signal dataclass and SignalType enum for strategy outputs Testing: - Add comprehensive test suites for all new modules - 159 tests passing, 24 skipped (async WebSocket timing) - 82% code coverage Dependencies: - Add pandas-stubs to dev dependencies for mypy compatibility
2025-12-27 15:28:28 +04:00
parent 7d63e43b7b
commit eca17b42fe
15 changed files with 2579 additions and 1 deletions
--- a/tests/test_validator.py
+++ b/tests/test_validator.py
@@ -0,0 +1,381 @@
+"""Unit tests for DataValidator (candle validation and gap detection)."""
+
+import tempfile
+from datetime import datetime, timedelta
+from decimal import Decimal
+from pathlib import Path
+
+import pytest
+
+from tradefinder.adapters.types import Candle
+from tradefinder.data.storage import DataStorage
+from tradefinder.data.validator import DataValidator
+
+
+class TestDataValidatorCandleValidation:
+    """Tests for single candle validation."""
+
+    def test_validate_candle_valid_candle(self) -> None:
+        """Valid candle returns empty errors list."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("50000.00"),
+            high=Decimal("51000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("50500.00"),
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert errors == []
+
+    def test_validate_candle_high_below_low(self) -> None:
+        """High < low is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("50000.00"),
+            high=Decimal("49000.00"),  # Invalid
+            low=Decimal("51000.00"),  # Invalid
+            close=Decimal("50500.00"),
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "high < low" in errors
+
+    def test_validate_candle_high_below_open(self) -> None:
+        """High < open is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("51000.00"),  # Invalid
+            high=Decimal("50000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("50500.00"),
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "high < open" in errors
+
+    def test_validate_candle_high_below_close(self) -> None:
+        """High < close is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("50000.00"),
+            high=Decimal("50000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("51000.00"),  # Invalid
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "high < close" in errors
+
+    def test_validate_candle_low_above_open(self) -> None:
+        """Low > open is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("49000.00"),  # Invalid
+            high=Decimal("51000.00"),
+            low=Decimal("50000.00"),
+            close=Decimal("50500.00"),
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "low > open" in errors
+
+    def test_validate_candle_low_above_close(self) -> None:
+        """Low > close is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("50000.00"),
+            high=Decimal("51000.00"),
+            low=Decimal("51000.00"),  # Invalid
+            close=Decimal("49000.00"),  # Invalid
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "low > close" in errors
+
+    def test_validate_candle_negative_volume(self) -> None:
+        """Negative volume is detected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("50000.00"),
+            high=Decimal("51000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("50500.00"),
+            volume=Decimal("-100.50"),  # Invalid
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "volume < 0" in errors
+
+    def test_validate_candle_non_datetime_timestamp(self) -> None:
+        """Non-datetime timestamp is detected."""
+        candle = Candle(
+            timestamp="2024-01-01",  # Invalid type
+            open=Decimal("50000.00"),
+            high=Decimal("51000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("50500.00"),
+            volume=Decimal("100.50"),
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert "timestamp must be datetime" in errors
+
+    def test_validate_candle_multiple_errors(self) -> None:
+        """Multiple validation errors are collected."""
+        candle = Candle(
+            timestamp=datetime.now(),
+            open=Decimal("52000.00"),  # > high
+            high=Decimal("51000.00"),
+            low=Decimal("49000.00"),
+            close=Decimal("48000.00"),  # < low
+            volume=Decimal("-100.50"),  # Negative
+        )
+        errors = DataValidator.validate_candle(candle)
+        assert len(errors) >= 3
+        assert any("high < open" in error for error in errors)
+        assert any("low > close" in error for error in errors)
+        assert any("volume < 0" in error for error in errors)
+
+
+class TestDataValidatorBatchValidation:
+    """Tests for batch candle validation."""
+
+    def test_validate_candles_valid_batch(self) -> None:
+        """Valid candles return empty errors list."""
+        candles = [
+            Candle(
+                timestamp=datetime(2024, 1, 1, i),
+                open=Decimal("50000.00"),
+                high=Decimal("51000.00"),
+                low=Decimal("49000.00"),
+                close=Decimal("50500.00"),
+                volume=Decimal("100.50"),
+            )
+            for i in range(3)
+        ]
+        errors = DataValidator.validate_candles(candles)
+        assert errors == []
+
+    def test_validate_candles_with_errors(self) -> None:
+        """Invalid candles produce error messages."""
+        candles = [
+            Candle(  # Valid
+                timestamp=datetime(2024, 1, 1, 0),
+                open=Decimal("50000.00"),
+                high=Decimal("51000.00"),
+                low=Decimal("49000.00"),
+                close=Decimal("50500.00"),
+                volume=Decimal("100.50"),
+            ),
+            Candle(  # Invalid: high < low
+                timestamp=datetime(2024, 1, 1, 1),
+                open=Decimal("50000.00"),
+                high=Decimal("49000.00"),
+                low=Decimal("51000.00"),
+                close=Decimal("50500.00"),
+                volume=Decimal("100.50"),
+            ),
+        ]
+        errors = DataValidator.validate_candles(candles)
+        assert len(errors) == 1
+        assert "2024-01-01T01:00:00" in errors[0]
+        assert "high < low" in errors[0]
+
+
+class TestDataValidatorGapDetection:
+    """Tests for gap detection in stored data."""
+
+    @pytest.fixture
+    def storage(self) -> DataStorage:
+        """Test database fixture."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.duckdb"
+            storage = DataStorage(db_path)
+            with storage:
+                storage.initialize_schema()
+                yield storage
+
+    def test_find_gaps_no_data(self, storage: DataStorage) -> None:
+        """No gaps when no data exists."""
+        start = datetime(2024, 1, 1)
+        end = datetime(2024, 1, 2)
+        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+        assert len(gaps) == 1
+        assert gaps[0] == (start, end)
+
+    def test_find_gaps_start_after_end_raises(self, storage: DataStorage) -> None:
+        """ValueError when start > end."""
+        start = datetime(2024, 1, 2)
+        end = datetime(2024, 1, 1)
+        with pytest.raises(ValueError, match="start must be before end"):
+            DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+
+    def test_find_gaps_continuous_data(self, storage: DataStorage) -> None:
+        """No gaps when data is continuous."""
+        base_time = datetime(2024, 1, 1, 0)
+        candles = [
+            Candle(
+                timestamp=base_time + timedelta(hours=i),
+                open=Decimal("50000"),
+                high=Decimal("51000"),
+                low=Decimal("49000"),
+                close=Decimal("50500"),
+                volume=Decimal("100"),
+            )
+            for i in range(5)
+        ]
+        storage.insert_candles(candles, "BTCUSDT", "1h")
+
+        start = base_time
+        end = base_time + timedelta(hours=4)
+        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+        assert gaps == []
+
+    def test_find_gaps_with_gaps(self, storage: DataStorage) -> None:
+        """Gaps are detected correctly."""
+        base_time = datetime(2024, 1, 1, 0)
+        # Insert candles at hours 0, 2, 4 (missing 1, 3)
+        candles = [
+            Candle(
+                timestamp=base_time + timedelta(hours=i * 2),
+                open=Decimal("50000"),
+                high=Decimal("51000"),
+                low=Decimal("49000"),
+                close=Decimal("50500"),
+                volume=Decimal("100"),
+            )
+            for i in range(3)
+        ]
+        storage.insert_candles(candles, "BTCUSDT", "1h")
+
+        start = base_time
+        end = base_time + timedelta(hours=4)
+        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+
+        assert len(gaps) == 2
+        # Gap between hour 0 and hour 2 (missing hour 1)
+        assert gaps[0] == (base_time + timedelta(hours=1), base_time + timedelta(hours=2))
+        # Gap between hour 2 and hour 4 (missing hour 3)
+        assert gaps[1] == (base_time + timedelta(hours=3), base_time + timedelta(hours=4))
+
+    def test_find_gaps_initial_gap(self, storage: DataStorage) -> None:
+        """Gap at start is detected."""
+        base_time = datetime(2024, 1, 1, 0)
+        candles = [
+            Candle(
+                timestamp=base_time + timedelta(hours=2),  # Start at hour 2
+                open=Decimal("50000"),
+                high=Decimal("51000"),
+                low=Decimal("49000"),
+                close=Decimal("50500"),
+                volume=Decimal("100"),
+            )
+        ]
+        storage.insert_candles(candles, "BTCUSDT", "1h")
+
+        start = base_time
+        end = base_time + timedelta(hours=3)
+        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+
+        assert len(gaps) == 1
+        # Gap from start to hour 2
+        assert gaps[0] == (start, base_time + timedelta(hours=2))
+
+    def test_find_gaps_trailing_gap(self, storage: DataStorage) -> None:
+        """Gap at end is detected."""
+        base_time = datetime(2024, 1, 1, 0)
+        candles = [
+            Candle(
+                timestamp=base_time,
+                open=Decimal("50000"),
+                high=Decimal("51000"),
+                low=Decimal("49000"),
+                close=Decimal("50500"),
+                volume=Decimal("100"),
+            )
+        ]
+        storage.insert_candles(candles, "BTCUSDT", "1h")
+
+        start = base_time
+        end = base_time + timedelta(hours=2)
+        gaps = DataValidator.find_gaps(storage, "BTCUSDT", "1h", start, end)
+
+        assert len(gaps) == 1
+        # Gap from hour 1 to end
+        assert gaps[0] == (base_time + timedelta(hours=1), end)
+
+
+class TestDataValidatorGapReport:
+    """Tests for gap reporting functionality."""
+
+    @pytest.fixture
+    def storage(self) -> DataStorage:
+        """Test database fixture."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            db_path = Path(tmpdir) / "test.duckdb"
+            storage = DataStorage(db_path)
+            with storage:
+                storage.initialize_schema()
+                yield storage
+
+    def test_get_gap_report_empty_database(self, storage: DataStorage) -> None:
+        """Empty database returns zero gaps."""
+        report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h")
+        assert report["symbol"] == "BTCUSDT"
+        assert report["timeframe"] == "1h"
+        assert report["gap_count"] == 0
+        assert report["total_gap_seconds"] == 0.0
+        assert report["max_gap_seconds"] == 0.0
+        assert report["gaps"] == []
+        assert report["checked_from"] is None
+        assert report["checked_to"] is None
+
+    def test_get_gap_report_with_data(self, storage: DataStorage) -> None:
+        """Gap report includes gap statistics."""
+        base_time = datetime(2024, 1, 1, 0)
+        # Insert candles at hours 0, 2, 4 (missing 1, 3)
+        candles = [
+            Candle(
+                timestamp=base_time + timedelta(hours=i * 2),
+                open=Decimal("50000"),
+                high=Decimal("51000"),
+                low=Decimal("49000"),
+                close=Decimal("50500"),
+                volume=Decimal("100"),
+            )
+            for i in range(3)
+        ]
+        storage.insert_candles(candles, "BTCUSDT", "1h")
+
+        report = DataValidator.get_gap_report(storage, "BTCUSDT", "1h")
+        assert report["symbol"] == "BTCUSDT"
+        assert report["timeframe"] == "1h"
+        assert report["gap_count"] == 2
+        assert report["total_gap_seconds"] == 7200.0  # 2 hours in seconds
+        assert report["max_gap_seconds"] == 3600.0  # 1 hour in seconds
+        assert len(report["gaps"]) == 2
+        assert report["checked_from"] == base_time
+        assert report["checked_to"] == base_time + timedelta(hours=4)
+
+
+class TestDataValidatorTimeframeInterval:
+    """Tests for timeframe interval calculation."""
+
+    def test_interval_for_timeframe_1m(self) -> None:
+        """1m timeframe interval is 1 minute."""
+        interval = DataValidator._interval_for_timeframe("1m")
+        assert interval == timedelta(minutes=1)
+
+    def test_interval_for_timeframe_1h(self) -> None:
+        """1h timeframe interval is 1 hour."""
+        interval = DataValidator._interval_for_timeframe("1h")
+        assert interval == timedelta(hours=1)
+
+    def test_interval_for_timeframe_1d(self) -> None:
+        """1d timeframe interval is 1 day."""
+        interval = DataValidator._interval_for_timeframe("1d")
+        assert interval == timedelta(days=1)
+
+    def test_interval_for_timeframe_unknown_raises(self) -> None:
+        """Unknown timeframe raises ValueError."""
+        with pytest.raises(ValueError, match="Unknown timeframe"):
+            DataValidator._interval_for_timeframe("unknown")