Rate Limiting in JavaScript: Complete Tutorial
A complete tutorial on rate limiting in JavaScript. Covers token bucket and sliding window algorithms, building a client-side rate limiter class, handling 429 responses with Retry-After headers, server-side rate limiting with Express middleware, per-user and per-IP limiting, rate limit headers, and integrating rate limiting with API clients.
Rate limiting controls how many requests a client can make to an API within a given time window. It prevents abuse, protects server resources, and ensures fair usage. This guide covers both client-side rate limiters (to stay under API limits) and server-side rate limiting (to enforce limits on incoming requests).
Rate Limiting Algorithms
| Algorithm | How It Works | Pros | Cons |
|---|---|---|---|
| Fixed Window | Count requests per time window (e.g., 100/min) | Simple to implement | Burst at window boundary |
| Sliding Window | Rolling time window, weighted average | Smooth traffic | More memory usage |
| Token Bucket | Tokens refill at fixed rate; each request costs a token | Allows controlled bursts | Slightly more complex |
| Leaky Bucket | Queue requests, process at fixed rate | Very smooth output | High latency under load |
Token Bucket Rate Limiter
class TokenBucket {
constructor(capacity, refillRate) {
this.capacity = capacity; // Max tokens
this.tokens = capacity; // Current tokens
this.refillRate = refillRate; // Tokens added per second
this.lastRefill = Date.now();
}
refill() {
const now = Date.now();
const elapsed = (now - this.lastRefill) / 1000;
this.tokens = Math.min(this.capacity, this.tokens + elapsed * this.refillRate);
this.lastRefill = now;
}
tryConsume(tokens = 1) {
this.refill();
if (this.tokens >= tokens) {
this.tokens -= tokens;
return true;
}
return false;
}
getWaitTime(tokens = 1) {
this.refill();
if (this.tokens >= tokens) return 0;
const deficit = tokens - this.tokens;
return Math.ceil((deficit / this.refillRate) * 1000); // ms
}
}
// 10 requests per second, burst up to 20
const limiter = new TokenBucket(20, 10);
async function makeRequest(url) {
if (!limiter.tryConsume()) {
const waitMs = limiter.getWaitTime();
console.log(`Rate limited. Waiting ${waitMs}ms`);
await new Promise((resolve) => setTimeout(resolve, waitMs));
limiter.tryConsume(); // consume after waiting
}
return fetch(url);
}Sliding Window Rate Limiter
class SlidingWindowLimiter {
constructor(maxRequests, windowMs) {
this.maxRequests = maxRequests;
this.windowMs = windowMs;
this.timestamps = [];
}
tryConsume() {
const now = Date.now();
const windowStart = now - this.windowMs;
// Remove timestamps outside the window
this.timestamps = this.timestamps.filter((t) => t > windowStart);
if (this.timestamps.length < this.maxRequests) {
this.timestamps.push(now);
return true;
}
return false;
}
getWaitTime() {
if (this.timestamps.length < this.maxRequests) return 0;
const oldest = this.timestamps[0];
return oldest + this.windowMs - Date.now();
}
remaining() {
const windowStart = Date.now() - this.windowMs;
const active = this.timestamps.filter((t) => t > windowStart).length;
return Math.max(0, this.maxRequests - active);
}
}
// 100 requests per minute
const apiLimiter = new SlidingWindowLimiter(100, 60000);Client-Side Rate-Limited API Client
class RateLimitedClient {
constructor(baseUrl, options = {}) {
this.baseUrl = baseUrl;
this.limiter = new TokenBucket(
options.burstSize || 10,
options.requestsPerSecond || 5
);
this.queue = [];
this.processing = false;
}
async request(endpoint, options = {}) {
return new Promise((resolve, reject) => {
this.queue.push({ endpoint, options, resolve, reject });
this.processQueue();
});
}
async processQueue() {
if (this.processing) return;
this.processing = true;
while (this.queue.length > 0) {
if (!this.limiter.tryConsume()) {
const waitMs = this.limiter.getWaitTime();
await new Promise((r) => setTimeout(r, waitMs));
continue;
}
const { endpoint, options, resolve, reject } = this.queue.shift();
try {
const response = await fetch(`${this.baseUrl}${endpoint}`, {
...options,
headers: {
"Content-Type": "application/json",
...options.headers,
},
});
if (response.status === 429) {
const retryAfter = parseInt(response.headers.get("Retry-After") || "5", 10);
this.queue.unshift({ endpoint, options, resolve, reject });
await new Promise((r) => setTimeout(r, retryAfter * 1000));
continue;
}
resolve(response);
} catch (error) {
reject(error);
}
}
this.processing = false;
}
async get(endpoint) {
return this.request(endpoint, { method: "GET" });
}
async post(endpoint, data) {
return this.request(endpoint, {
method: "POST",
body: JSON.stringify(data),
});
}
}
const api = new RateLimitedClient("https://api.example.com", {
burstSize: 10,
requestsPerSecond: 5,
});Handling 429 Responses
async function fetchWithRateLimitHandling(url, options = {}) {
const maxRetries = 3;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
const response = await fetch(url, options);
if (response.status !== 429) return response;
// Parse Retry-After header
const retryAfter = response.headers.get("Retry-After");
let waitMs;
if (retryAfter) {
// Could be seconds or an HTTP date
const seconds = parseInt(retryAfter, 10);
if (!isNaN(seconds)) {
waitMs = seconds * 1000;
} else {
waitMs = new Date(retryAfter).getTime() - Date.now();
}
} else {
// Exponential backoff fallback
waitMs = Math.pow(2, attempt) * 1000;
}
console.warn(`Rate limited. Retrying in ${waitMs}ms (attempt ${attempt + 1}/${maxRetries})`);
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
throw new Error("Rate limit exceeded after maximum retries");
}See advanced API error handling in JS full guide for comprehensive error handling that includes rate limit errors.
Server-Side Rate Limiting (Express)
class InMemoryRateLimiter {
constructor(maxRequests, windowMs) {
this.maxRequests = maxRequests;
this.windowMs = windowMs;
this.clients = new Map();
}
isAllowed(key) {
const now = Date.now();
const record = this.clients.get(key);
if (!record || now - record.windowStart >= this.windowMs) {
this.clients.set(key, { windowStart: now, count: 1 });
return { allowed: true, remaining: this.maxRequests - 1 };
}
if (record.count < this.maxRequests) {
record.count++;
return { allowed: true, remaining: this.maxRequests - record.count };
}
const retryAfter = Math.ceil((record.windowStart + this.windowMs - now) / 1000);
return { allowed: false, remaining: 0, retryAfter };
}
cleanup() {
const now = Date.now();
for (const [key, record] of this.clients) {
if (now - record.windowStart >= this.windowMs) {
this.clients.delete(key);
}
}
}
}
// Express middleware
function rateLimitMiddleware(options = {}) {
const limiter = new InMemoryRateLimiter(
options.maxRequests || 100,
options.windowMs || 60000
);
// Clean up expired entries every minute
setInterval(() => limiter.cleanup(), 60000);
return (req, res, next) => {
const key = options.keyGenerator
? options.keyGenerator(req)
: req.ip;
const result = limiter.isAllowed(key);
// Set standard rate limit headers
res.set("X-RateLimit-Limit", options.maxRequests || 100);
res.set("X-RateLimit-Remaining", result.remaining);
if (!result.allowed) {
res.set("Retry-After", result.retryAfter);
res.set("X-RateLimit-Reset", Math.ceil((Date.now() + result.retryAfter * 1000) / 1000));
return res.status(429).json({
error: "Too many requests",
retryAfter: result.retryAfter,
});
}
next();
};
}
// Usage with Express
// app.use("/api/", rateLimitMiddleware({ maxRequests: 100, windowMs: 60000 }));
// app.use("/api/auth/login", rateLimitMiddleware({ maxRequests: 5, windowMs: 300000 }));Rate Limit Headers
| Header | Description | Example |
|---|---|---|
X-RateLimit-Limit | Max requests allowed per window | 100 |
X-RateLimit-Remaining | Requests remaining in current window | 73 |
X-RateLimit-Reset | Unix timestamp when the window resets | 1709712000 |
Retry-After | Seconds to wait before retrying (on 429) | 30 |
Batch Processing With Rate Limits
async function batchProcess(items, processFn, rateLimit = 5) {
const limiter = new SlidingWindowLimiter(rateLimit, 1000);
const results = [];
for (const item of items) {
while (!limiter.tryConsume()) {
const waitMs = limiter.getWaitTime();
await new Promise((resolve) => setTimeout(resolve, waitMs));
}
try {
const result = await processFn(item);
results.push({ item, result, status: "fulfilled" });
} catch (error) {
results.push({ item, error, status: "rejected" });
}
}
return results;
}
// Process 50 items at max 5 requests/second
const results = await batchProcess(
userIds,
(id) => fetch(`/api/users/${id}`).then((r) => r.json()),
5
);Rune AI
Key Insights
- Token bucket allows controlled bursts: Tokens refill at a constant rate but accumulate up to a capacity, letting short bursts through while enforcing average limits
- Sliding window prevents boundary bursts: Unlike fixed windows, a sliding window cannot be exploited by sending max requests at the end of one window and the start of the next
- Always read Retry-After on 429 responses: The server tells you exactly how long to wait; honor it instead of guessing
- Rate limit both client and server: Client-side limiting improves UX; server-side limiting is the security boundary
- Queue requests instead of dropping them: A rate-limited client should queue excess requests and process them as capacity becomes available
Frequently Asked Questions
What is the difference between rate limiting and throttling?
Which algorithm should I use?
How do I rate limit per user?
Should rate limiting be on the client or server?
How do I handle rate limits with Axios interceptors?
Conclusion
Rate limiting protects APIs from abuse and ensures fair resource distribution. Client-side rate limiters (token bucket or sliding window) keep your app within API limits before requests are sent. Server-side middleware (Express) enforces limits with standard headers. Always handle 429 responses with Retry-After parsing and exponential backoff. For the error handling architecture, see advanced API error handling in JS full guide. For the event loop that schedules rate limit timers, see the JS event loop architecture complete guide.
More in this topic
OffscreenCanvas API in JS for UI Performance
Master the OffscreenCanvas API to offload rendering from the main thread. Covers worker-based 2D and WebGL rendering, animation loops inside workers, bitmap transfer, double buffering, chart rendering pipelines, image processing, and performance measurement strategies.
Advanced Web Workers for High Performance JS
Master Web Workers for truly parallel JavaScript execution. Covers dedicated and shared workers, structured cloning, transferable objects, SharedArrayBuffer with Atomics, worker pools, task scheduling, Comlink RPC patterns, module workers, and performance profiling strategies.
JavaScript Macros and Abstract Code Generation
Master JavaScript code generation techniques for compile-time and runtime metaprogramming. Covers AST manipulation, Babel plugin authorship, tagged template literals as macros, code generation pipelines, source-to-source transformation, compile-time evaluation, and safe eval alternatives.