Node.js AWS DynamoDB Scan specific field(s) only

In many AWS DynamoDB datas, I want to search specific key-value data using node.js.

My Database single-key sample is like under.

{
  "device_id" : "abcde12345", // Primary Key, String
  "setting": {
    "left_motor": 30,
    "right_motor": 30,
    "motor_name": "STRING_WHAT_I_WANT_TO_CHECK"
  },
  "another_fields1" : "1234",
  "another_fields2": {
    "key1" : 1,
    "key2" : "2",
    "key3" : { ... }
  }
}

My code for collect all of item's motor_name is like under.

// require modules
var fs       = require('fs'); // used for save json-result.
var AWS      = require('aws-sdk');
var config   = require('./config_file'); // secret data
var schedule = require('node-schedule');

AWS.config.update({
  region: config.dynamodb.region,
  endpoint: config.dynamodb.endpoint,
  accessKeyId: config.dynamodb.access_key,
  secretAccessKey: config.dynamodb.secret_key
});

var documentClient = new AWS.DynamoDB.DocumentClient();

var params = {
  TableName: config.dynamodb.device_table
};

// I set run once at 9 o'clock, but after do single-time, I kill pm2 process.
// (just for run once only)

var motor_name_list = {}; // result will be saved.
var j = schedule.scheduleJob("0 0 9 * * *", function() {
  documentClient.scan(params, onScan);
});

function onScan(err, data) {
  if(err) {
    console.log(err);
  }
  else { // READ SUCCESS
    /*
      'single_item' sample is like upper json-code
       But, I reference only one value, setting.motor_name
    */
    data.Items.forEach(function(single_item) {
      // process scan success datas
      var motor_name = single_item.setting.motor_name;
      if(motor_name_list.hasOwnProperty(motor_name))
        motor_name_list[motor_name] += 1;
      else
        motor_name_list[motor_name] = 1;
    });

 
    if(typeof data.LastEvaluatedKey != 'undefined') {
      params.ExclusiveStartKey = data.LastEvaluatedKey;

      // Because of Read Limit per minute, I delay next-step for one minute.
      // (maybe, ReadCapacityUnits)
      setTimeout(function() {
        documentClient.scan(params, onScan);
      }, 60 * 1000);
    }
    else {
      // save motor_name_list as a file (fs.writeFileSync...)
    }
  } // END OF READ SUCCESS
}

Because of delay, It takes a lot of time. (10~15 minutes, in fact).
(single onScan reads 300~500 items and I have almost 5000 items.

Is there any possible way to scan only single-item without change my json format?

Thank you for read my Question.

Answers 1

  • Is there any possible way to scan only single-item without change my json format?

    Unfortunately no, you must use Scan for searching non-key attributes

    Assuming the schema cannot be changed, but you can add an additional attribute and you are willing to accept some duplication:

    • Save a copy of motor_name as setting_motor_name as a top-level attribute
    • Create a GSI on setting_motor_name
    • Use Query on setting_motor_name to achieve milliseconds efficiency

    At a glance:

    {
      "setting": {
        "left_motor": 30,
        "right_motor": 30,
        "motor_name": "STRING_WHAT_I_WANT_TO_CHECK"
      },
      "setting_motor_name": "STRING_WHAT_I_WANT_TO_CHECK"
    }
    

    The tradeoffs are:

    • You will have to take care of inserting setting_motor_name for any existing data you are keeping
    • Any update to setting.motor_name should be updated to setting_motor_name

    --

    The best scenario is to keep this data flat, but that requires changing the schema invasively. That way no duplication, and you can apply GSI on motor_name because it is a top-level attribute

    {
      "setting": {
        "left_motor": 30,
        "right_motor": 30
      },
      "motor_name": "STRING_WHAT_I_WANT_TO_CHECK"
    }
    

Related Articles