Query Optimization
MongoDB Query Optimization
Use Indexes
// Without index - Collection scan
db.users.find({ email: "john@example.com" });
// Examines: 1,000,000 documents
// With index - Index scan
db.users.createIndex({ email: 1 });
db.users.find({ email: "john@example.com" });
// Examines: 1 document
// Compound index for multiple fields
db.users.createIndex({ status: 1, createdAt: -1 });
db.users.find({ status: "active" }).sort({ createdAt: -1 });Analyze with explain()
// Execution stats
const explain = db.users.find({ email: "john@example.com" })
.explain("executionStats");
console.log({
executionTimeMillis: explain.executionStats.executionTimeMillis,
totalDocsExamined: explain.executionStats.totalDocsExamined,
totalKeysExamined: explain.executionStats.totalKeysExamined,
indexUsed: explain.executionStats.executionStages.indexName
});
// Good performance indicators:
// - totalDocsExamined close to nReturned
// - Uses index (IXSCAN vs COLLSCAN)
// - Low executionTimeMillisCovered Queries
// Covered query - No document fetch needed
db.users.createIndex({ email: 1, name: 1 });
db.users.find(
{ email: "john@example.com" },
{ _id: 0, email: 1, name: 1 } // Only indexed fields
);
// Verify covered
const explain = db.users.find(
{ email: "john@example.com" },
{ _id: 0, email: 1, name: 1 }
).explain("executionStats");
console.log(explain.executionStats.totalDocsExamined); // Should be 0Projection
// ❌ Bad - Returns all fields
db.users.find({ status: "active" });
// ✅ Good - Returns only needed fields
db.users.find(
{ status: "active" },
{ name: 1, email: 1, _id: 0 }
);
// Exclude large fields
db.users.find(
{ status: "active" },
{ largeField: 0 }
);Limit Results
// ❌ Bad - Returns all matching documents
db.users.find({ status: "active" });
// ✅ Good - Limit results
db.users.find({ status: "active" }).limit(10);
// Pagination
const page = 2;
const limit = 10;
db.users.find({ status: "active" })
.skip((page - 1) * limit)
.limit(limit);Aggregation Optimization
// ❌ Bad - Filter after grouping
db.orders.aggregate([
{ $group: { _id: "$userId", total: { $sum: "$amount" } } },
{ $match: { total: { $gte: 1000 } } }
]);
// ✅ Good - Filter early
db.orders.aggregate([
{ $match: { status: "completed" } }, // Filter first
{ $project: { userId: 1, amount: 1 } }, // Limit fields
{ $group: { _id: "$userId", total: { $sum: "$amount" } } },
{ $match: { total: { $gte: 1000 } } }
]);Cassandra Query Optimization
Partition Key Queries
// ✅ Good - Query by partition key
await client.execute(
'SELECT * FROM users WHERE user_id = ?',
[userId],
{ prepare: true }
);
// ❌ Bad - Query without partition key (full scan)
await client.execute(
'SELECT * FROM users WHERE email = ?',
[email],
{ prepare: true }
);Clustering Key Ordering
-- Table with clustering key
CREATE TABLE user_events (
user_id UUID,
timestamp TIMESTAMP,
event_type TEXT,
PRIMARY KEY (user_id, timestamp)
) WITH CLUSTERING ORDER BY (timestamp DESC);
-- Efficient query (uses clustering order)
SELECT * FROM user_events
WHERE user_id = ?
AND timestamp > ?
LIMIT 10;Avoid ALLOW FILTERING
-- ❌ Bad - Requires ALLOW FILTERING (slow)
SELECT * FROM users WHERE email = 'john@example.com' ALLOW FILTERING;
-- ✅ Good - Use materialized view
CREATE MATERIALIZED VIEW users_by_email AS
SELECT * FROM users
WHERE email IS NOT NULL
PRIMARY KEY (email, user_id);
SELECT * FROM users_by_email WHERE email = 'john@example.com';Redis Query Optimization
Use Appropriate Data Structures
// ❌ Bad - Store list as JSON string
await client.set('user:1:orders', JSON.stringify([1, 2, 3]));
const orders = JSON.parse(await client.get('user:1:orders'));
// ✅ Good - Use Redis list
await client.rPush('user:1:orders', ['1', '2', '3']);
const orders = await client.lRange('user:1:orders', 0, -1);
// ✅ Better - Use sorted set for ordered data
await client.zAdd('user:1:orders', [
{ score: Date.now(), value: '1' },
{ score: Date.now() + 1000, value: '2' }
]);Pipeline Commands
// ❌ Bad - Multiple round trips
for (let i = 0; i < 100; i++) {
await client.get(`key:${i}`);
}
// ✅ Good - Single round trip
const pipeline = client.multi();
for (let i = 0; i < 100; i++) {
pipeline.get(`key:${i}`);
}
const results = await pipeline.exec();Use Lua Scripts
// Atomic operations with single round trip
const script = `
local values = {}
for i, key in ipairs(KEYS) do
values[i] = redis.call('GET', key)
end
return values
`;
const keys = Array.from({ length: 100 }, (_, i) => `key:${i}`);
const values = await client.eval(script, { keys });DynamoDB Query Optimization
Use Query Instead of Scan
// ❌ Bad - Scan entire table
const command = new ScanCommand({
TableName: 'Users',
FilterExpression: 'email = :email',
ExpressionAttributeValues: {
':email': 'john@example.com'
}
});
// ✅ Good - Query with GSI
const command = new QueryCommand({
TableName: 'Users',
IndexName: 'EmailIndex',
KeyConditionExpression: 'email = :email',
ExpressionAttributeValues: {
':email': 'john@example.com'
}
});Projection Expressions
// ❌ Bad - Return all attributes
const command = new GetCommand({
TableName: 'Users',
Key: { userId: '123' }
});
// ✅ Good - Return only needed attributes
const command = new GetCommand({
TableName: 'Users',
Key: { userId: '123' },
ProjectionExpression: 'userId, #n, email',
ExpressionAttributeNames: {
'#n': 'name'
}
});Batch Operations
// ❌ Bad - Multiple single gets
for (const id of userIds) {
await docClient.send(new GetCommand({
TableName: 'Users',
Key: { userId: id }
}));
}
// ✅ Good - Batch get
const command = new BatchGetCommand({
RequestItems: {
Users: {
Keys: userIds.map(id => ({ userId: id }))
}
}
});
const response = await docClient.send(command);General Optimization Techniques
Connection Pooling
// MongoDB connection pool
const client = new MongoClient(uri, {
maxPoolSize: 50,
minPoolSize: 10
});
// Cassandra connection pool
const client = new cassandra.Client({
contactPoints: ['node1', 'node2'],
pooling: {
coreConnectionsPerHost: {
[cassandra.types.distance.local]: 2,
[cassandra.types.distance.remote]: 1
}
}
});Caching
// Cache frequently accessed data
class UserService {
async getUser(userId) {
// Try cache first
const cached = await redis.get(`user:${userId}`);
if (cached) {
return JSON.parse(cached);
}
// Load from database
const user = await db.collection('users').findOne({ _id: userId });
// Cache for 5 minutes
await redis.setex(`user:${userId}`, 300, JSON.stringify(user));
return user;
}
}Denormalization
// ❌ Bad - Multiple queries
const user = await db.users.findOne({ _id: userId });
const orders = await db.orders.find({ userId }).toArray();
// ✅ Good - Embedded data
const user = await db.users.findOne({ _id: userId });
// user.recentOrders already embeddedMonitoring and Profiling
// MongoDB profiler
db.setProfilingLevel(2); // Log all operations
db.system.profile.find().sort({ ts: -1 }).limit(10);
// Slow query log
db.setProfilingLevel(1, { slowms: 100 }); // Log queries > 100ms
// .NET with monitoring
public class MongoService
{
private readonly IMongoClient _client;
public MongoService()
{
var settings = MongoClientSettings.FromConnectionString(connectionString);
settings.ClusterConfigurator = cb =>
{
cb.Subscribe<CommandStartedEvent>(e =>
{
Console.WriteLine($"Command: {e.CommandName}");
Console.WriteLine($"Query: {e.Command}");
});
cb.Subscribe<CommandSucceededEvent>(e =>
{
Console.WriteLine($"Duration: {e.Duration}");
});
};
_client = new MongoClient(settings);
}
}Performance Checklist
const optimizationChecklist = {
indexes: [
'Create indexes on frequently queried fields',
'Use compound indexes for multiple fields',
'Drop unused indexes',
'Monitor index usage'
],
queries: [
'Use projection to limit fields',
'Limit result sets',
'Use covered queries when possible',
'Avoid scatter-gather queries'
],
aggregation: [
'Filter early with $match',
'Limit fields with $project',
'Use indexes for $match and $sort',
'Use $limit to reduce processing'
],
general: [
'Use connection pooling',
'Implement caching layer',
'Denormalize when appropriate',
'Monitor slow queries',
'Use batch operations'
]
};Interview Tips
- Explain indexes: Critical for query performance
- Show explain(): Analyze query execution
- Demonstrate covered queries: No document fetch
- Discuss projection: Limit returned fields
- Mention caching: Redis for frequently accessed data
- Show examples: MongoDB, Cassandra, DynamoDB
Summary
Optimize NoSQL queries with proper indexing, covered queries, projection, and result limiting. Use explain() to analyze performance. Filter early in aggregation pipelines. Query by partition/shard keys. Avoid full table scans. Use batch operations. Implement caching layer. Monitor slow queries. Denormalize when appropriate. Connection pooling for efficiency. Essential for production NoSQL performance.
Test Your Knowledge
Take a quick quiz to test your understanding of this topic.