File size: 4,875 Bytes
33ccadb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
"""
Cache manager for storing predictions and uploaded data
"""

import logging
from typing import Dict, Optional
from datetime import datetime, timedelta
import pandas as pd

from config.constants import MAX_PREDICTION_HISTORY

logger = logging.getLogger(__name__)


class CacheManager:
    """
    Manages caching of predictions and data to improve performance
    """

    def __init__(self):
        self.predictions = []  # List of prediction results
        self.uploaded_data = {}  # Dict of uploaded datasets
        self.max_predictions = MAX_PREDICTION_HISTORY

    def store_prediction(
        self,
        data_hash: str,
        horizon: int,
        confidence_levels: list,
        result: Dict
    ):
        """
        Store a prediction result

        Args:
            data_hash: Hash of the input data
            horizon: Forecast horizon used
            confidence_levels: Confidence levels used
            result: Prediction result dictionary
        """
        prediction_entry = {
            'data_hash': data_hash,
            'horizon': horizon,
            'confidence_levels': confidence_levels,
            'result': result,
            'timestamp': datetime.now()
        }

        self.predictions.append(prediction_entry)

        # Keep only the most recent predictions
        if len(self.predictions) > self.max_predictions:
            self.predictions = self.predictions[-self.max_predictions:]

        logger.debug(f"Stored prediction, cache size: {len(self.predictions)}")

    def get_prediction(
        self,
        data_hash: str,
        horizon: int,
        confidence_levels: list
    ) -> Optional[Dict]:
        """
        Retrieve a cached prediction if available

        Args:
            data_hash: Hash of the input data
            horizon: Forecast horizon
            confidence_levels: Confidence levels

        Returns:
            Cached prediction result or None
        """
        for entry in reversed(self.predictions):
            if (entry['data_hash'] == data_hash and
                entry['horizon'] == horizon and
                entry['confidence_levels'] == confidence_levels):

                logger.info("Cache hit for prediction")
                return entry['result']

        logger.debug("Cache miss for prediction")
        return None

    def store_data(self, filename: str, data: pd.DataFrame):
        """
        Store uploaded data

        Args:
            filename: Name of the uploaded file
            data: DataFrame containing the data
        """
        self.uploaded_data[filename] = {
            'data': data,
            'timestamp': datetime.now()
        }

        logger.info(f"Stored data for {filename}")

    def get_data(self, filename: str) -> Optional[pd.DataFrame]:
        """
        Retrieve uploaded data

        Args:
            filename: Name of the file

        Returns:
            DataFrame or None
        """
        if filename in self.uploaded_data:
            return self.uploaded_data[filename]['data']
        return None

    def clear_old_data(self, max_age_hours: int = 24):
        """
        Clear data older than specified hours

        Args:
            max_age_hours: Maximum age in hours
        """
        cutoff = datetime.now() - timedelta(hours=max_age_hours)

        # Clear old uploaded data
        old_files = [
            filename for filename, entry in self.uploaded_data.items()
            if entry['timestamp'] < cutoff
        ]

        for filename in old_files:
            del self.uploaded_data[filename]

        if old_files:
            logger.info(f"Cleared {len(old_files)} old data entries")

    def clear_all(self):
        """Clear all cached data"""
        self.predictions.clear()
        self.uploaded_data.clear()
        logger.info("Cleared all cache")

    def get_stats(self) -> Dict:
        """Get cache statistics"""
        return {
            'num_predictions': len(self.predictions),
            'num_datasets': len(self.uploaded_data),
            'total_memory_mb': self._estimate_memory()
        }

    def _estimate_memory(self) -> float:
        """Estimate memory usage in MB (rough estimate)"""
        try:
            total_bytes = 0

            # Estimate prediction cache size
            for entry in self.predictions:
                if 'forecast' in entry['result']:
                    total_bytes += entry['result']['forecast'].memory_usage(deep=True).sum()

            # Estimate data cache size
            for entry in self.uploaded_data.values():
                total_bytes += entry['data'].memory_usage(deep=True).sum()

            return total_bytes / (1024 * 1024)
        except Exception as e:
            logger.warning(f"Failed to estimate memory: {str(e)}")
            return 0.0


# Global cache instance
cache_manager = CacheManager()