tessa.sources.rate_limiter

Rate Limiter -- makes sure we don't hit the APIs too often by making sure a minimum amount of time has elapsed between calls to an API.

The goal is to never run into errors in the first place, b/c some sites take substantial time until it allow-lists a blocked IP address again. That is also why we can't use a library such das Tenacity here.

 1"""Rate Limiter -- makes sure we don't hit the APIs too often by making sure a minimum
 2amount of time has elapsed between calls to an API.
 3
 4The goal is to never run into errors in the first place, b/c some sites take substantial
 5time until it allow-lists a blocked IP address again. That is also why we can't use a
 6library such das Tenacity here.
 7"""
 8
 9from dataclasses import dataclass
10import datetime
11import time
12import pendulum
13
14
15VERY_LONG_AGO = pendulum.parse("1900")
16INITIAL_BACK_OFF_TIME = 10
17
18
19@dataclass
20class RateLimiter:
21    """Encapsulates state and stats of a rate limiter object."""
22
23    wait_seconds: float
24    """Enforce this amount of seconds between subsequent calls."""
25
26    last_call: datetime.datetime = VERY_LONG_AGO
27    """Keeps track of last call's timestamp."""
28
29    count_all_calls: int = 0
30    """Number of total calls."""
31
32    count_limited_calls: int = 0
33    """Number of calls that triggered some waiting."""
34
35    back_off_time: int = INITIAL_BACK_OFF_TIME
36    """Number of seconds to wait after a rate limit hit."""
37
38    def reset_back_off(self):
39        """Reset back-off time to initial value."""
40        self.back_off_time = INITIAL_BACK_OFF_TIME
41
42    def reset(self):
43        """Reset state and stats."""
44        self.last_call = VERY_LONG_AGO
45        self.count_all_calls = self.count_limited_calls = 0
46        self.reset_back_off()
47
48    def rate_limit(self):
49        """Enforce the minimum wait time as specified in `wait_seconds`."""
50        diff = (pendulum.now() - self.last_call).total_seconds()
51        if diff < self.wait_seconds:
52            time.sleep(self.wait_seconds - diff)
53            self.count_limited_calls += 1
54        self.last_call = pendulum.now()
55        self.count_all_calls += 1
56
57    def back_off(self):
58        """Back off exponentially."""
59        time.sleep(self.back_off_time)
60        self.back_off_time *= 2
@dataclass
class RateLimiter:
20@dataclass
21class RateLimiter:
22    """Encapsulates state and stats of a rate limiter object."""
23
24    wait_seconds: float
25    """Enforce this amount of seconds between subsequent calls."""
26
27    last_call: datetime.datetime = VERY_LONG_AGO
28    """Keeps track of last call's timestamp."""
29
30    count_all_calls: int = 0
31    """Number of total calls."""
32
33    count_limited_calls: int = 0
34    """Number of calls that triggered some waiting."""
35
36    back_off_time: int = INITIAL_BACK_OFF_TIME
37    """Number of seconds to wait after a rate limit hit."""
38
39    def reset_back_off(self):
40        """Reset back-off time to initial value."""
41        self.back_off_time = INITIAL_BACK_OFF_TIME
42
43    def reset(self):
44        """Reset state and stats."""
45        self.last_call = VERY_LONG_AGO
46        self.count_all_calls = self.count_limited_calls = 0
47        self.reset_back_off()
48
49    def rate_limit(self):
50        """Enforce the minimum wait time as specified in `wait_seconds`."""
51        diff = (pendulum.now() - self.last_call).total_seconds()
52        if diff < self.wait_seconds:
53            time.sleep(self.wait_seconds - diff)
54            self.count_limited_calls += 1
55        self.last_call = pendulum.now()
56        self.count_all_calls += 1
57
58    def back_off(self):
59        """Back off exponentially."""
60        time.sleep(self.back_off_time)
61        self.back_off_time *= 2

Encapsulates state and stats of a rate limiter object.

RateLimiter( wait_seconds: float, last_call: datetime.datetime = DateTime(1900, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC')), count_all_calls: int = 0, count_limited_calls: int = 0, back_off_time: int = 10)
wait_seconds: float

Enforce this amount of seconds between subsequent calls.

last_call: datetime.datetime = DateTime(1900, 1, 1, 0, 0, 0, tzinfo=Timezone('UTC'))

Keeps track of last call's timestamp.

count_all_calls: int = 0

Number of total calls.

count_limited_calls: int = 0

Number of calls that triggered some waiting.

back_off_time: int = 10

Number of seconds to wait after a rate limit hit.

def reset_back_off(self):
39    def reset_back_off(self):
40        """Reset back-off time to initial value."""
41        self.back_off_time = INITIAL_BACK_OFF_TIME

Reset back-off time to initial value.

def reset(self):
43    def reset(self):
44        """Reset state and stats."""
45        self.last_call = VERY_LONG_AGO
46        self.count_all_calls = self.count_limited_calls = 0
47        self.reset_back_off()

Reset state and stats.

def rate_limit(self):
49    def rate_limit(self):
50        """Enforce the minimum wait time as specified in `wait_seconds`."""
51        diff = (pendulum.now() - self.last_call).total_seconds()
52        if diff < self.wait_seconds:
53            time.sleep(self.wait_seconds - diff)
54            self.count_limited_calls += 1
55        self.last_call = pendulum.now()
56        self.count_all_calls += 1

Enforce the minimum wait time as specified in wait_seconds.

def back_off(self):
58    def back_off(self):
59        """Back off exponentially."""
60        time.sleep(self.back_off_time)
61        self.back_off_time *= 2

Back off exponentially.