CryptoTrading/tests/test_data_fetcher.py

"""Unit tests for DataFetcher (backfill and sync logic)."""

import tempfile
from datetime import datetime, timedelta
from decimal import Decimal
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock

import pytest

from tradefinder.adapters.types import Candle
from tradefinder.data.fetcher import TIMEFRAME_MS, DataFetcher
from tradefinder.data.storage import DataStorage


def make_candle(timestamp: datetime) -> Candle:
    """Create a test candle at given timestamp."""
    return Candle(
        timestamp=timestamp,
        open=Decimal("50000.00"),
        high=Decimal("51000.00"),
        low=Decimal("49000.00"),
        close=Decimal("50500.00"),
        volume=Decimal("1000.00"),
    )


class TestTimeframeMappings:
    """Tests for timeframe constant mappings."""

    def test_common_timeframes_are_defined(self) -> None:
        """All expected timeframes have millisecond mappings."""
        expected = ["1m", "5m", "15m", "30m", "1h", "4h", "1d", "1w"]
        for tf in expected:
            assert tf in TIMEFRAME_MS
            assert TIMEFRAME_MS[tf] > 0

    def test_timeframe_values_are_correct(self) -> None:
        """Timeframe millisecond values are accurate."""
        assert TIMEFRAME_MS["1m"] == 60 * 1000
        assert TIMEFRAME_MS["1h"] == 60 * 60 * 1000
        assert TIMEFRAME_MS["4h"] == 4 * 60 * 60 * 1000
        assert TIMEFRAME_MS["1d"] == 24 * 60 * 60 * 1000


class TestDataFetcherBackfill:
    """Tests for backfill_candles functionality."""

    async def test_backfill_fetches_and_stores_candles(self) -> None:
        """Candles are fetched from adapter and stored."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            # Mock adapter
            adapter = MagicMock()
            start = datetime(2024, 1, 1, 0, 0, 0)
            end = datetime(2024, 1, 1, 4, 0, 0)

            # Return 4 candles then empty (end of data)
            candles = [make_candle(start + timedelta(hours=i)) for i in range(4)]
            adapter.get_candles = AsyncMock(side_effect=[candles, []])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                total = await fetcher.backfill_candles("BTCUSDT", "1h", start, end)

                assert total == 4
                assert storage.get_candle_count("BTCUSDT", "1h") == 4

    async def test_backfill_uses_default_end_date(self) -> None:
        """End date defaults to now if not provided."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])  # No data

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                start = datetime.now() - timedelta(hours=1)
                await fetcher.backfill_candles("BTCUSDT", "1h", start)

                # Should have been called (even if no data returned)
                adapter.get_candles.assert_called()

    async def test_backfill_raises_on_unknown_timeframe(self) -> None:
        """ValueError raised for unknown timeframe."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                with pytest.raises(ValueError, match="Unknown timeframe"):
                    await fetcher.backfill_candles(
                        "BTCUSDT",
                        "invalid_tf",
                        datetime(2024, 1, 1),
                    )

    async def test_backfill_handles_empty_response(self) -> None:
        """Empty response from adapter is handled gracefully."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                total = await fetcher.backfill_candles(
                    "BTCUSDT",
                    "1h",
                    datetime(2024, 1, 1),
                    datetime(2024, 1, 2),
                )

                assert total == 0

    async def test_backfill_respects_batch_size(self) -> None:
        """Batch size is respected and capped at 1500."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                # Request with batch_size > 1500
                await fetcher.backfill_candles(
                    "BTCUSDT",
                    "1h",
                    datetime(2024, 1, 1),
                    datetime(2024, 1, 2),
                    batch_size=2000,
                )

                # Verify limit was capped at 1500
                call_kwargs = adapter.get_candles.call_args.kwargs
                assert call_kwargs["limit"] == 1500

    async def test_backfill_paginates_correctly(self) -> None:
        """Multiple batches are fetched for large date ranges."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            start = datetime(2024, 1, 1, 0, 0, 0)

            # Return 3 candles per batch, simulate 2 full batches + partial
            batch1 = [make_candle(start + timedelta(hours=i)) for i in range(3)]
            batch2 = [make_candle(start + timedelta(hours=i + 3)) for i in range(3)]
            batch3 = [make_candle(start + timedelta(hours=6))]  # Partial batch

            adapter.get_candles = AsyncMock(side_effect=[batch1, batch2, batch3])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()
                fetcher = DataFetcher(adapter, storage)

                total = await fetcher.backfill_candles(
                    "BTCUSDT",
                    "1h",
                    start,
                    start + timedelta(hours=10),
                    batch_size=3,
                )

                # 3 + 3 + 1 = 7 candles
                assert total == 7
                assert adapter.get_candles.call_count == 3


class TestDataFetcherSync:
    """Tests for sync_candles functionality."""

    async def test_sync_from_latest_timestamp(self) -> None:
        """Sync starts from last stored candle."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()

                # Pre-populate with some candles
                base_ts = datetime(2024, 1, 1, 0, 0, 0)
                existing = [make_candle(base_ts + timedelta(hours=i)) for i in range(5)]
                storage.insert_candles(existing, "BTCUSDT", "1h")

                fetcher = DataFetcher(adapter, storage)
                await fetcher.sync_candles("BTCUSDT", "1h")

                # Verify sync started from after the last candle
                call_kwargs = adapter.get_candles.call_args.kwargs
                start_time = call_kwargs.get("start_time")
                # Should be after hour 4 (the last existing candle)
                expected_start_ms = int((base_ts + timedelta(hours=5)).timestamp() * 1000)
                assert start_time >= expected_start_ms

    async def test_sync_with_no_existing_data_uses_lookback(self) -> None:
        """Sync uses lookback when no existing data."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()

                fetcher = DataFetcher(adapter, storage)
                await fetcher.sync_candles("BTCUSDT", "1h", lookback_days=7)

                # Should have called get_candles
                adapter.get_candles.assert_called()


class TestDataFetcherLatest:
    """Tests for fetch_latest_candles functionality."""

    async def test_fetch_latest_stores_candles(self) -> None:
        """Latest candles are fetched and stored."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            now = datetime.now()
            candles = [make_candle(now - timedelta(hours=i)) for i in range(5)]
            adapter.get_candles = AsyncMock(return_value=candles)

            with DataStorage(db_path) as storage:
                storage.initialize_schema()

                fetcher = DataFetcher(adapter, storage)
                count = await fetcher.fetch_latest_candles("BTCUSDT", "1h", limit=5)

                assert count == 5
                assert storage.get_candle_count("BTCUSDT", "1h") == 5

    async def test_fetch_latest_with_empty_response(self) -> None:
        """Empty response returns 0."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()

                fetcher = DataFetcher(adapter, storage)
                count = await fetcher.fetch_latest_candles("BTCUSDT", "1h")

                assert count == 0

    async def test_fetch_latest_respects_limit(self) -> None:
        """Limit parameter is passed to adapter."""
        with tempfile.TemporaryDirectory() as tmpdir:
            db_path = Path(tmpdir) / "test.duckdb"

            adapter = MagicMock()
            adapter.get_candles = AsyncMock(return_value=[])

            with DataStorage(db_path) as storage:
                storage.initialize_schema()

                fetcher = DataFetcher(adapter, storage)
                await fetcher.fetch_latest_candles("BTCUSDT", "1h", limit=50)

                call_kwargs = adapter.get_candles.call_args.kwargs
                assert call_kwargs["limit"] == 50