Pythian Blog: Technical Track

Choosing best index for your MongoDB query

Indexing plays a quintessential role in database query performance and MongoDB is no exception. Choosing the best index for a query will improve its performance, while a bad one could lead to huge execution times and high memory utilization.

In this article, I will walk you through the process of finding the right index for a common tuning scenario. Consider an example collection with 81K documents similar to the one below:

MongoDB Enterprise repl:PRIMARY> db.city_inspections.findOne() { "_id" : ObjectId("56d61033a378eccde8a83550"), "id" : "10057-2015-ENFO", "certificate_number" : 6007104, "business_name" : "LD BUSINESS SOLUTIONS", "date" : "Feb 25 2015", "result" : "Violation Issued", "sector" : "Tax Preparers - 891", "address" : { "city" : "NEW YORK", "zip" : 10030, "street" : "FREDERICK DOUGLASS BLVD", "number" : 2655 } } 
And the following query: db.city_inspections.find({certificate_number:{$gt:6000000,$lte:9400000},"address.zip":10030}).sort({date:-1}) The query is looking for all documents where certificate number is between 6M to 9.4M and the zip code is equal to 10030. The result will be sorted by date, in descending order. Let’s now observe how it’s handled by MongoDB:
MongoDB Enterprise > explain.find({certificate_number:{$gt:6000000,$lte:9400000},"address.zip":10030}).sort({date:-1})
 {
  "queryPlanner" : {
  "plannerVersion" : 1,
  "namespace" : "blog.city_inspections",
  "indexFilterSet" : false,
  "parsedQuery" : {
  "$and" : [
  {
  "address.zip" : {
  "$eq" : 10030
  }
  },
  {
  "certificate_number" : {
  "$lte" : 9400000
  }
  },
  {
  "certificate_number" : {
  "$gt" : 6007104
  }
  }
  ]
  },
  "winningPlan" : { "stage" : "SORT", "sortPattern" : { "date" : -1 }, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ { "address.zip" : { "$eq" : 10030 } }, { "certificate_number" : { "$lte" : 9400000 } }, { "certificate_number" : { "$gt" : 6007104 } } ] }, "direction" : "forward"
  }
  }
  },
  "rejectedPlans" : [ ]
  },
  "executionStats" : {
  "executionSuccess" : true,
  "nReturned" : 82, "executionTimeMillis" : 149, "totalKeysExamined" : 0, "totalDocsExamined" : 81047,
  "executionStages" : {
  "stage" : "SORT",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 20,
  "works" : 81133,
  "advanced" : 82,
  "needTime" : 81050,
  "needYield" : 0,
  "saveState" : 633,
  "restoreState" : 633,
  "isEOF" : 1,
  "invalidates" : 0,
  "sortPattern" : {
  "date" : -1
  },
  "memUsage" : 24329,
  "memLimit" : 33554432,
  "inputStage" : {
  "stage" : "SORT_KEY_GENERATOR",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 20,
  "works" : 81050,
  "advanced" : 82,
  "needTime" : 80967,
  "needYield" : 0,
  "saveState" : 633,
  "restoreState" : 633,
  "isEOF" : 1,
  "invalidates" : 0,
  "inputStage" : {
  "stage" : "COLLSCAN",
  "filter" : {
  "$and" : [
  {
  "address.zip" : {
  "$eq" : 10030
  }
  },
  {
  "certificate_number" : {
  "$lte" : 9400000
  }
  },
  {
  "certificate_number" : {
  "$gt" : 6007104
  }
  }
  ]
  },
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 120,
  "works" : 81049,
  "advanced" : 82,
  "needTime" : 80966,
  "needYield" : 0,
  "saveState" : 633,
  "restoreState" : 633,
  "isEOF" : 1,
  "invalidates" : 0,
  "direction" : "forward",
  "docsExamined" : 81047
  }
  }
  }
  },
  "serverInfo" : {
  "host" : "m103",
  "port" : 27000,
  "version" : "3.6.5-rc0",
  "gitVersion" : "a20ecd3e3a174162052ff99913bc2ca9a839d618"
  },
  "ok" : 1
 }
 MongoDB Enterprise > 
  

From the execution stats, we can observe that MongoDB has performed a collection scan (totalDocsExamined:81047 and totalKeysScanned:0) to return 82 documents (nReturned) in 149 ms. The time taken might look small here but actually, the ratio between totalDocsExamined:totalKeysExamine:nReturned is what we are interested in. We might decide to create an index as per the keys’ order in the query:

 {certificate_number:1,"address.zip":1,date:-1}
MongoDB Enterprise > db.city_inspections.createIndex({certificate_number:1,"address.zip":1, date:-1})
 {
  "createdCollectionAutomatically" : false,
  "numIndexesBefore" : 1,
  "numIndexesAfter" : 2,
  "ok" : 1
 }
 
After creating the index and running the query again, we get the following execution plan:
MongoDB Enterprise > explain.find({certificate_number:{$gt:6007104,$lte:9400000}, "address.zip":10030}).sort({date:-1})
 {
  "queryPlanner" : {
  "plannerVersion" : 1,
  "namespace" : "blog.city_inspections",
  "indexFilterSet" : false,
  "parsedQuery" : {
  "$and" : [
  {
  "address.zip" : {
  "$eq" : 10030
  }
  },
  {
  "certificate_number" : {
  "$lte" : 9400000
  }
  },
  {
  "certificate_number" : {
  "$gt" : 6007104
  }
  }
  ]
  },
  "winningPlan" : { "stage" : "SORT", "sortPattern" : { "date" : -1 }, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "certificate_number" : 1, "address.zip" : 1, "date" : -1 }, "indexName" : "certificate_number_1_address.zip_1_date_-1", "isMultiKey" : false, "multiKeyPaths" : { "certificate_number" : [ ], "address.zip" : [ ], "date" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "certificate_number" : [ "(6007104.0, 9400000.0]" ], "address.zip" : [ "[10030.0, 10030.0]" ], "date" : [ "[MaxKey, MinKey]"
  ]
  }
  }
  }
  }
  },
  "rejectedPlans" : [ ]
  },
  "executionStats" : {
  "executionSuccess" : true,
  "nReturned" : 82, "executionTimeMillis" : 174, "totalKeysExamined" : 40270, "totalDocsExamined" : 82,
  "executionStages" : {
  "stage" : "SORT",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 171,
  "works" : 40354,
  "advanced" : 82,
  "needTime" : 40271,
  "needYield" : 0,
  "saveState" : 316,
  "restoreState" : 316,
  "isEOF" : 1,
  "invalidates" : 0,
  "sortPattern" : {
  "date" : -1
  },
  "memUsage" : 24329,
  "memLimit" : 33554432,
  "inputStage" : {
  "stage" : "SORT_KEY_GENERATOR",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 171,
  "works" : 40271,
  "advanced" : 82,
  "needTime" : 40188,
  "needYield" : 0,
  "saveState" : 316,
  "restoreState" : 316,
  "isEOF" : 1,
  "invalidates" : 0,
  "inputStage" : {
  "stage" : "FETCH",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 171,
  "works" : 40270,
  "advanced" : 82,
  "needTime" : 40187,
  "needYield" : 0,
  "saveState" : 316,
  "restoreState" : 316,
  "isEOF" : 1,
  "invalidates" : 0,
  "docsExamined" : 82,
  "alreadyHasObj" : 0,
  "inputStage" : {
  "stage" : "IXSCAN",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 171,
  "works" : 40270,
  "advanced" : 82,
  "needTime" : 40187,
  "needYield" : 0,
  "saveState" : 316,
  "restoreState" : 316,
  "isEOF" : 1,
  "invalidates" : 0,
  "keyPattern" : {
  "certificate_number" : 1,
  "address.zip" : 1,
  "date" : -1
  },
  "indexName" : "certificate_number_1_address.zip_1_date_-1",
  "isMultiKey" : false,
  "multiKeyPaths" : {
  "certificate_number" : [ ],
  "address.zip" : [ ],
  "date" : [ ]
  },
  "isUnique" : false,
  "isSparse" : false,
  "isPartial" : false,
  "indexVersion" : 2,
  "direction" : "forward",
  "indexBounds" : {
  "certificate_number" : [
  "(6007104.0, 9400000.0]"
  ],
  "address.zip" : [
  "[10030.0, 10030.0]"
  ],
  "date" : [
  "[MaxKey, MinKey]"
  ]
  },
  "keysExamined" : 40270,
  "seeks" : 40188,
  "dupsTested" : 0,
  "dupsDropped" : 0,
  "seenInvalidated" : 0
  }
  }
  }
  }
  },
  "serverInfo" : {
  "host" : "m103",
  "port" : 27000,
  "version" : "3.6.5-rc0",
  "gitVersion" : "a20ecd3e3a174162052ff99913bc2ca9a839d618"
  },
  "ok" : 1
 }
Indexing should reduce execution time, but in this case, execution time has increased by 10%! totalDocsExamined:nReturned ratio is now 1:1 which is optimal, although 40k+ index keys were examined this time. This is because the engine is first fetching the documents by certificate number to then find the ones matching the zip code. There are 40K records with a certificate number between 6M to 9.4M but only 82 of them have a zip code equal to 10030. Sorting is done at the last stage. Even though it’s a covering index, it is not very efficient, and we still didn’t achieve the best ratio between totalKeysExamined:totalDocsExamined:nReturn possible (1:1:1). Let's try making the equality key the first index key, and certificate_number (range), the second: {"address.zip":1,certificate_number:1,date:-1} Let’s create the above index and dive into the plan:
MongoDB Enterprise > db.city_inspections.createIndex({"address.zip":1,certificate_number:1, date:-1})
 {
  "createdCollectionAutomatically" : false,
  "numIndexesBefore" : 2,
  "numIndexesAfter" : 3,
  "ok" : 1
 }
 

After creating the index and running the query again, we get the following execution plan:

MongoDB Enterprise > explain.find({certificate_number:{$gt:6007104,$lte:9400000},"address.zip":10030}).sort({date:-1})
 {
  "queryPlanner" : {
  "plannerVersion" : 1,
  "namespace" : "blog.city_inspections",
  "indexFilterSet" : false,
  "parsedQuery" : {
  "$and" : [
  {
  "address.zip" : {
  "$eq" : 10030
  }
  },
  {
  "certificate_number" : {
  "$lte" : 9400000
  }
  },
  {
  "certificate_number" : {
  "$gt" : 6007104
  }
  }
  ]
  },
   "winningPlan" : { "stage" : "SORT", "sortPattern" : { "date" : -1 }, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "address.zip" : 1, "certificate_number" : 1, "date" : -1 }, "indexName" : "address.zip_1_certificate_number_1_date_-1", "isMultiKey" : false, "multiKeyPaths" : { "address.zip" : [ ], "certificate_number" : [ ], "date" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "address.zip" : [ "[10030.0, 10030.0]" ], "certificate_number" : [ "(6007104.0, 9400000.0]" ], "date" : [ "[MaxKey, MinKey]"
  ]
  }
  }
  }
  }
  },
  "rejectedPlans" : [
  {
  "stage" : "SORT",
  "sortPattern" : {
  "date" : -1
  },
  "inputStage" : {
  "stage" : "SORT_KEY_GENERATOR",
  "inputStage" : {
  "stage" : "FETCH",
  "inputStage" : {
  "stage" : "IXSCAN",
  "keyPattern" : {
  "certificate_number" : 1,
  "address.zip" : 1,
  "date" : -1
  },
  "indexName" : "certificate_number_1_address.zip_1_date_-1",
  "isMultiKey" : false,
  "multiKeyPaths" : {
  "certificate_number" : [ ],
  "address.zip" : [ ],
  "date" : [ ]
  },
  "isUnique" : false,
  "isSparse" : false,
  "isPartial" : false,
  "indexVersion" : 2,
  "direction" : "forward",
  "indexBounds" : {
  "certificate_number" : [
  "(6007104.0, 9400000.0]"
  ],
  "address.zip" : [
  "[10030.0, 10030.0]"
  ],
  "date" : [
  "[MaxKey, MinKey]"
  ]
  }
  }
  }
  }
  }
  ]
  },
  "executionStats" : {
  "executionSuccess" : true,
  "nReturned" : 82, "executionTimeMillis" : 2, "totalKeysExamined" : 82, "totalDocsExamined" : 82,
  "executionStages" : {
  "stage" : "SORT",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 168,
  "advanced" : 82,
  "needTime" : 84,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "sortPattern" : {
  "date" : -1
  },
  "memUsage" : 24329,
  "memLimit" : 33554432,
  "inputStage" : {
  "stage" : "SORT_KEY_GENERATOR",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 84,
  "advanced" : 82,
  "needTime" : 1,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "inputStage" : {
  "stage" : "FETCH",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 83,
  "advanced" : 82,
  "needTime" : 0,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "docsExamined" : 82,
  "alreadyHasObj" : 0,
  "inputStage" : {
  "stage" : "IXSCAN",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 83,
  "advanced" : 82,
  "needTime" : 0,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "keyPattern" : {
  "address.zip" : 1,
  "certificate_number" : 1,
  "date" : -1
  },
  "indexName" : "address.zip_1_certificate_number_1_date_-1",
  "isMultiKey" : false,
  "multiKeyPaths" : {
  "address.zip" : [ ],
  "certificate_number" : [ ],
  "date" : [ ]
  },
  "isUnique" : false,
  "isSparse" : false,
  "isPartial" : false,
  "indexVersion" : 2,
  "direction" : "forward",
  "indexBounds" : {
  "address.zip" : [
  "[10030.0, 10030.0]"
  ],
  "certificate_number" : [
  "(6007104.0, 9400000.0]"
  ],
  "date" : [
  "[MaxKey, MinKey]"
  ]
  },
  "keysExamined" : 82,
  "seeks" : 1,
  "dupsTested" : 0,
  "dupsDropped" : 0,
  "seenInvalidated" : 0
  }
  }
  }
  }
  },
  "serverInfo" : {
  "host" : "m103",
  "port" : 27000,
  "version" : "3.6.5-rc0",
  "gitVersion" : "a20ecd3e3a174162052ff99913bc2ca9a839d618"
  },
  "ok" : 1
 }
 
 
The above Index seems to work better! Now we can see that totalKeysScanned: totalDocsExamined:nRetuned is 1:1:1 as we expected. We might conclude that this is the best index for this query, but let's see if we can do something to reduce the amount of sorting required. Consider the following index:
 {"address.zip":1,date:-1,certificate_number:1}

And the execution plan with the above index would be:

MongoDB Enterprise > db.city_inspections.createIndex({"address.zip":1,date:-1,certificate_number:1})
 {
  "createdCollectionAutomatically" : false,
  "numIndexesBefore" : 3,
  "numIndexesAfter" : 4,
  "ok" : 1
 }
 

After creating the index, let's see the execution plan:


 MongoDB Enterprise > explain.find({certificate_number:{$gt:6007104,$lte:9400000},"address.zip":10030}).sort({date:-1})
 {
  "queryPlanner" : {
  "plannerVersion" : 1,
  "namespace" : "blog.city_inspections",
  "indexFilterSet" : false,
  "parsedQuery" : {
  "$and" : [
  {
  "address.zip" : {
  "$eq" : 10030
  }
  },
  {
  "certificate_number" : {
  "$lte" : 9400000
  }
  },
  {
  "certificate_number" : {
  "$gt" : 6007104
  }
  }
  ]
  },
   "winningPlan" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "address.zip" : 1, "date" : -1, "certificate_number" : 1 }, "indexName" : "address.zip_1_date_-1_certificate_number_1", "isMultiKey" : false, "multiKeyPaths" : { "address.zip" : [ ], "date" : [ ], "certificate_number" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "address.zip" : [ "[10030.0, 10030.0]" ], "date" : [ "[MaxKey, MinKey]" ], "certificate_number" : [ "(6007104.0, 9400000.0]" ] } } }, "rejectedPlans" : [ { "stage" : "SORT", "sortPattern" : { "date" : -1 }, "inputStage" : { "stage" : "SORT_KEY_GENERATOR", "inputStage" : { "stage" : "FETCH", "inputStage" : { "stage" : "IXSCAN", "keyPattern" : { "certificate_number" : 1, "address.zip" : 1, "date" : -1 }, "indexName" : "certificate_number_1_address.zip_1_date_-1", "isMultiKey" : false, "multiKeyPaths" : { "certificate_number" : [ ], "address.zip" : [ ], "date" : [ ] }, "isUnique" : false, "isSparse" : false, "isPartial" : false, "indexVersion" : 2, "direction" : "forward", "indexBounds" : { "certificate_number" : [ "(6007104.0, 9400000.0]" ], "address.zip" : [ "[10030.0, 10030.0]" ], "date" : [ "[MaxKey, MinKey]"
  ]
  }
  }
  }
  }
  },
  {
  "stage" : "SORT",
  "sortPattern" : {
  "date" : -1
  },
  "inputStage" : {
  "stage" : "SORT_KEY_GENERATOR",
  "inputStage" : {
  "stage" : "FETCH",
  "inputStage" : {
  "stage" : "IXSCAN",
  "keyPattern" : {
  "address.zip" : 1,
  "certificate_number" : 1,
  "date" : -1
  },
  "indexName" : "address.zip_1_certificate_number_1_date_-1",
  "isMultiKey" : false,
  "multiKeyPaths" : {
  "address.zip" : [ ],
  "certificate_number" : [ ],
  "date" : [ ]
  },
  "isUnique" : false,
  "isSparse" : false,
  "isPartial" : false,
  "indexVersion" : 2,
  "direction" : "forward",
  "indexBounds" : {
  "address.zip" : [
  "[10030.0, 10030.0]"
  ],
  "certificate_number" : [
  "(6007104.0, 9400000.0]"
  ],
  "date" : [
  "[MaxKey, MinKey]"
  ]
  }
  }
  }
  }
  }
  ]
  },
  "executionStats" : {
  "executionSuccess" : true,
   "nReturned" : 82, "executionTimeMillis" : 3, "totalKeysExamined" : 129, "totalDocsExamined" : 82,
  "executionStages" : {
  "stage" : "FETCH",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 130,
  "advanced" : 82,
  "needTime" : 46,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "docsExamined" : 82,
  "alreadyHasObj" : 0,
  "inputStage" : {
  "stage" : "IXSCAN",
  "nReturned" : 82,
  "executionTimeMillisEstimate" : 0,
  "works" : 129,
  "advanced" : 82,
  "needTime" : 46,
  "needYield" : 0,
  "saveState" : 3,
  "restoreState" : 3,
  "isEOF" : 1,
  "invalidates" : 0,
  "keyPattern" : {
  "address.zip" : 1,
  "date" : -1,
  "certificate_number" : 1
  },
  "indexName" : "address.zip_1_date_-1_certificate_number_1",
  "isMultiKey" : false,
  "multiKeyPaths" : {
  "address.zip" : [ ],
  "date" : [ ],
  "certificate_number" : [ ]
  },
  "isUnique" : false,
  "isSparse" : false,
  "isPartial" : false,
  "indexVersion" : 2,
  "direction" : "forward",
  "indexBounds" : {
  "address.zip" : [
  "[10030.0, 10030.0]"
  ],
  "date" : [
  "[MaxKey, MinKey]"
  ],
  "certificate_number" : [
  "(6007104.0, 9400000.0]"
  ]
  },
  "keysExamined" : 129,
  "seeks" : 47,
  "dupsTested" : 0,
  "dupsDropped" : 0,
  "seenInvalidated" : 0
  }
  }
  },
  "serverInfo" : {
  "host" : "m103",
  "port" : 27000,
  "version" : "3.6.5-rc0",
  "gitVersion" : "a20ecd3e3a174162052ff99913bc2ca9a839d618"
  },
  "ok" : 1
 }
 MongoDB Enterprise >
 
 
 

From the execution stats, we can see that totalDocsExamined:totalKeysExamined:nReturned is not exactly 1:1:1 but, as

No Comments Yet

Let us know what you think

Subscribe by email