add batch request chunking (#1)

+ multiple batch requests of size configurable via max_batch
This commit is contained in:
michal-kapala 2024-11-27 23:29:47 +01:00
parent ec13e0fb17
commit 78f3dfc7bd
4 changed files with 126 additions and 46 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.env .env
*.txt

View File

@ -52,9 +52,10 @@ For versions `>=0.23.2`, enable and configure [batch API](https://pocketbase.io/
You can change the default import options to your needs: You can change the default import options to your needs:
| Name | Files | Required | Description | Example use | | Name | Files | Required | Description | Example use |
| --------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ------------------- | | --------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------- |
| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv | | input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv |
| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id | | id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id |
| max_batch | CSV/JSON | No | Max batch request size in rows, should not exceed PocketBase's `Max allowed batch requests`. Defaults to 50. | --max_batch=100 |
| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CRLF (`\r\n`) | --lf | | lf | CSV | No | LF (`\n`) EOL character will be used instead of default CRLF (`\r\n`) | --lf |
| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" | | delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" |
| quote | CSV | No | Value quote character, defaults to `'` | --quote="~" | | quote | CSV | No | Value quote character, defaults to `'` | --quote="~" |
@ -74,6 +75,12 @@ Basic import (root directory):
deno run csv.ts --input=example.csv deno run csv.ts --input=example.csv
``` ```
Import without permission prompts and with max batch request size of 1 row:
```
deno run --allow-read --allow-env --allow-net csv.ts --input=example.csv --max_batch=1
```
Import without permission prompts and with `_id` column as text: Import without permission prompts and with `_id` column as text:
``` ```
@ -98,6 +105,12 @@ Basic import (root directory):
deno run json.ts --input=example.json deno run json.ts --input=example.json
``` ```
Import without permission prompts and with max batch request size of 1 row:
```
deno run --allow-read --allow-env --allow-net json.ts --input=example.json --max_batch=1
```
Import without permission prompts and with `_id` column as text: Import without permission prompts and with `_id` column as text:
``` ```

55
csv.ts
View File

@ -16,7 +16,7 @@ async function importCsv() {
// parse CLI args // parse CLI args
const options = parse(Deno.args, { const options = parse(Deno.args, {
string: ["input", "delimiter", "quote"], string: ["input", "delimiter", "quote", "max_batch"],
boolean: ["id", "lf"], boolean: ["id", "lf"],
default: { default: {
/** /**
@ -39,6 +39,10 @@ async function importCsv() {
* Whether LF end-of-line should be used (defaults to CRLF). * Whether LF end-of-line should be used (defaults to CRLF).
*/ */
lf: false, lf: false,
/**
* Default max batch request size (configurable in PB dashboard).
*/
max_batch: "50"
}, },
}); });
@ -47,6 +51,14 @@ async function importCsv() {
Deno.exit(-1); Deno.exit(-1);
} }
let BATCH_SIZE = 50;
try {
BATCH_SIZE = parseInt(options.max_batch)
} catch (err) {
console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red");
Deno.exit(-1);
}
// read the file // read the file
const data = await readCsv(options.input, options); const data = await readCsv(options.input, options);
@ -83,11 +95,11 @@ async function importCsv() {
}; };
// show the submitted collection // show the submitted collection
console.log(collection); console.log("Collection", collection);
// create the new collection // create the new collection
// import will fail if a collection with the same name exists // import will fail if a collection with the same name exists
await pb.collections.import([collection]); await pb.collections.import([collection], false);
console.log( console.log(
`%c[Import] Collection '${collectionName}' created!`, `%c[Import] Collection '${collectionName}' created!`,
@ -99,25 +111,46 @@ async function importCsv() {
console.log(`[Import] Importing ${rows.length} rows...`); console.log(`[Import] Importing ${rows.length} rows...`);
const chunks = Math.floor(rows.length / BATCH_SIZE);
const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks;
let createdCount = 0;
let chunk = 0;
while (chunk < batches) {
// create new request
console.log(`[Import] Batch request #${chunk+1}`);
const batch = pb.createBatch(); const batch = pb.createBatch();
for (let rowCount = 0; rowCount < rows.length; rowCount++) let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE;
batch.collection(collectionName).create(rows[rowCount]) if (chunkSize === 0)
chunkSize = BATCH_SIZE;
for (let rowCount = 0; rowCount < chunkSize; rowCount++)
batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount])
// send the chunk
try { try {
const result = await batch.send(); const result = await batch.send();
let createdCount = 0; // TODO: this should become a debug-level log
//console.log("Array<BatchRequestResult>", result);
let chunkCreatedCount = 0;
for (const reqRes of result) { for (const reqRes of result) {
if (reqRes.status === 200) if (reqRes.status === 200)
createdCount++; chunkCreatedCount++;
} }
const color = chunkCreatedCount === chunkSize ? "green" : "orange";
console.log(
`%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`,
`color: ${color}`,
);
createdCount += chunkCreatedCount;
} catch(err) {
console.error(err);
}
chunk++;
}
const color = createdCount === data.length ? "green" : "orange"; const color = createdCount === data.length ? "green" : "orange";
console.log( console.log(
`%c[Import] Imported rows: ${createdCount}/${data.length}`, `%c[Import] Imported rows: ${createdCount}/${data.length}`,
`color: ${color}`, `color: ${color}`,
); );
} catch(err) {
console.error(err);
}
} }
importCsv(); importCsv();

53
json.ts
View File

@ -16,7 +16,7 @@ async function importJson() {
// parse CLI args // parse CLI args
const options = parse(Deno.args, { const options = parse(Deno.args, {
string: ["input"], string: ["input", "max_batch"],
boolean: ["id"], boolean: ["id"],
default: { default: {
/** /**
@ -27,6 +27,10 @@ async function importJson() {
* Flag to always set `_id` column type to Plain text (detected by default). * Flag to always set `_id` column type to Plain text (detected by default).
*/ */
id: false, id: false,
/**
* Default max batch request size (configurable in PB dashboard).
*/
max_batch: "50"
}, },
}); });
@ -35,6 +39,14 @@ async function importJson() {
Deno.exit(-1); Deno.exit(-1);
} }
let BATCH_SIZE = 50;
try {
BATCH_SIZE = parseInt(options.max_batch)
} catch (err) {
console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red");
Deno.exit(-1);
}
// read the file // read the file
const data = await readJson(options.input); const data = await readJson(options.input);
@ -65,7 +77,7 @@ async function importJson() {
}; };
// show the submitted collection // show the submitted collection
console.log(collection); console.log("Collection", collection);
// create the new collection // create the new collection
// import will fail if a collection with the same name exists // import will fail if a collection with the same name exists
@ -81,25 +93,46 @@ async function importJson() {
console.log(`[Import] Importing ${rows.length} rows...`); console.log(`[Import] Importing ${rows.length} rows...`);
const chunks = Math.floor(rows.length / BATCH_SIZE);
const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks;
let createdCount = 0;
let chunk = 0;
while (chunk < batches) {
// create new request
console.log(`[Import] Batch request #${chunk+1}`);
const batch = pb.createBatch(); const batch = pb.createBatch();
for (let rowCount = 0; rowCount < rows.length; rowCount++) let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE;
batch.collection(collectionName).create(rows[rowCount]) if (chunkSize === 0)
chunkSize = BATCH_SIZE;
for (let rowCount = 0; rowCount < chunkSize; rowCount++)
batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount])
// send the chunk
try { try {
const result = await batch.send(); const result = await batch.send();
let createdCount = 0; // TODO: this should become a debug-level log
//console.log("Array<BatchRequestResult>", result);
let chunkCreatedCount = 0;
for (const reqRes of result) { for (const reqRes of result) {
if (reqRes.status === 200) if (reqRes.status === 200)
createdCount++; chunkCreatedCount++;
} }
const color = chunkCreatedCount === chunkSize ? "green" : "orange";
console.log(
`%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`,
`color: ${color}`,
);
createdCount += chunkCreatedCount;
} catch(err) {
console.error(err);
}
chunk++;
}
const color = createdCount === data.length ? "green" : "orange"; const color = createdCount === data.length ? "green" : "orange";
console.log( console.log(
`%c[Import] Imported rows: ${createdCount}/${data.length}`, `%c[Import] Imported rows: ${createdCount}/${data.length}`,
`color: ${color}`, `color: ${color}`,
); );
} catch(err) {
console.error(err);
}
} }
importJson(); importJson();