Howdy there! You may have come here from our previous article doing the same thing with Python. How you got here though doesn't matter, let's talk shop. You're probably tired of uploading your entire dataset to Clarifai one image or file at a time. That's such a hassle. But I am here to show you there is an easier way of going about this. The programmatic way. This article will just be showing the code you need to do batching and assumes that you have done authorization and have an application ready.
Just a few things to remember:
- The optimal batch size is 32
- Calls are asynchronous, meaning that they'll all get run at once
- This is much faster and efficient than loading one file at a time. The Server Gods will be smiling down upon you for it
- If your files are fairly large (perhaps in the 1+ MB range) you may need to lower the batch size amount to avoid broken connection errors
In this portion we'll show just how to do it with adding inputs and tags and/or custom metadata, or just for predictions if that's what floats your boat.
IMPORTANT: For uploads via a JSON file, we're assuming that your data is structured like this:
[
{
"url": "url",
"metadata": {
"item 1": "",
"item 2": "",
"item 3": ""
}
},
{
"url": "url",
"metadata": {
"item 1": "",
"item 2": "",
"item 3": ""
}
}
// ...
]
It doesn't have to be like this, however if it isn't then you'll need to adjust the code below to reflect properly.
Also, if you're only doing predictions on a public model, you won't need to add the images to a collection first. You can do a app.models.predict
call on the imageList
variable.
Local files directly from a folder (on your computer)
const Clarifai = require('clarifai')
const fs = require('fs')
// Credentials
const app = new Clarifai.App({ apiKey: 'YOUR_API_KEY' })
const BATCH_SIZE = 32
const DIR_PATH = '/PATH/TO/file'
const readFile = function(dirPath) {
return fs.readdirSync(dirPath).map((file) => {
return fs.readFileSync(dirPath + file).toString('base64')
})
}
// Used to convert file into an acceptable object for Clarifai
const convertToInput = function(input) {
return {
base64: input
}
}
const uploadInputs = function(inputs){
for(let index = 0; index < inputs.length; index += BATCH_SIZE){
console.log("Current batch: " + (index / BATCH_SIZE + 1))
let imageList = []
for(let current = 0; (current + index < inputs.length) && current < BATCH_SIZE; current++){
imageList.push(inputs[current + index])
}
// Uploads inputs to Clarifai
// REQUIRED for Visual Search or Custom Training
app.inputs.create(imageList).then(
// Success
(response) => { console.log(response) },
// Error
(error) => { console.error(error) }
)
// And/or get predictions from these images
// app.models.predict(Clarifai.FOOD_MODEL, imageList).then(
// // Success
// (response) => { console.log(response) },
// // Error
// (error) => { console.error(error) }
// )
}
}
const images = readFile(DIR_PATH)
console.log("Number of images to process: " + images.length)
uploadInputs(images.map((image) => { return convertToInput(image) }))
URLs from an external file
(Note: Change "url" to "base64" in the JSON file if using that instead)
JSON
const Clarifai = require('clarifai')
const fs = require('fs')
// Credentials
const app = new Clarifai.App({ apiKey: 'YOUR_API_KEY' })
const BATCH_SIZE = 32
const FILE_PATH = '/PATH/TO/file.json'
const readFile = function(filePath) {
return JSON.parse(fs.readFileSync(filePath, 'utf8'))
}
const uploadInputs = function(inputs){
for(let index = 0; index < inputs.length; index += BATCH_SIZE){
console.log("Current batch: " + (index / BATCH_SIZE + 1))
let imageList = []
for(let current = 0; (current + index < inputs.length) && current < BATCH_SIZE; current++){
imageList.push(inputs[current + index])
}
// Uploads inputs to Clarifai
// REQUIRED for Visual Search or Custom Training
app.inputs.create(imageList).then(
// Success
(response) => { console.log(response) },
// Error
(error) => { console.error(error) }
)
// And/or get predictions from these images
// app.models.predict(Clarifai.FOOD_MODEL, imageList).then(
// // Success
// (response) => { console.log(response) },
// // Error
// (error) => { console.error(error) }
// )
}
}
const images = readFile(FILE_PATH)
console.log("Number of images to process: " + images.length)
uploadInputs(images)