From 78f3dfc7bd439ad7f71a571d0b95b11723638087 Mon Sep 17 00:00:00 2001 From: michal-kapala Date: Wed, 27 Nov 2024 23:29:47 +0100 Subject: [PATCH] add batch request chunking (#1) + multiple batch requests of size configurable via max_batch --- .gitignore | 1 + README.md | 27 ++++++++++++++------ csv.ts | 73 +++++++++++++++++++++++++++++++++++++++--------------- json.ts | 71 ++++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 126 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index 4c49bd7..08298b5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .env +*.txt diff --git a/README.md b/README.md index d2cdcc1..1ebd1a2 100644 --- a/README.md +++ b/README.md @@ -51,13 +51,14 @@ For versions `>=0.23.2`, enable and configure [batch API](https://pocketbase.io/ You can change the default import options to your needs: -| Name | Files | Required | Description | Example use | -| --------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ------------------- | -| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv | -| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id | -| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CRLF (`\r\n`) | --lf | -| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" | -| quote | CSV | No | Value quote character, defaults to `'` | --quote="~" | +| Name | Files | Required | Description | Example use | +| --------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------- | +| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv | +| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id | +| max_batch | CSV/JSON | No | Max batch request size in rows, should not exceed PocketBase's `Max allowed batch requests`. Defaults to 50. | --max_batch=100 | +| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CRLF (`\r\n`) | --lf | +| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" | +| quote | CSV | No | Value quote character, defaults to `'` | --quote="~" | # CSV @@ -74,6 +75,12 @@ Basic import (root directory): deno run csv.ts --input=example.csv ``` +Import without permission prompts and with max batch request size of 1 row: + +``` +deno run --allow-read --allow-env --allow-net csv.ts --input=example.csv --max_batch=1 +``` + Import without permission prompts and with `_id` column as text: ``` @@ -98,6 +105,12 @@ Basic import (root directory): deno run json.ts --input=example.json ``` +Import without permission prompts and with max batch request size of 1 row: + +``` +deno run --allow-read --allow-env --allow-net json.ts --input=example.json --max_batch=1 +``` + Import without permission prompts and with `_id` column as text: ``` diff --git a/csv.ts b/csv.ts index 43d55ed..d7207ff 100644 --- a/csv.ts +++ b/csv.ts @@ -16,7 +16,7 @@ async function importCsv() { // parse CLI args const options = parse(Deno.args, { - string: ["input", "delimiter", "quote"], + string: ["input", "delimiter", "quote", "max_batch"], boolean: ["id", "lf"], default: { /** @@ -39,6 +39,10 @@ async function importCsv() { * Whether LF end-of-line should be used (defaults to CRLF). */ lf: false, + /** + * Default max batch request size (configurable in PB dashboard). + */ + max_batch: "50" }, }); @@ -47,6 +51,14 @@ async function importCsv() { Deno.exit(-1); } + let BATCH_SIZE = 50; + try { + BATCH_SIZE = parseInt(options.max_batch) + } catch (err) { + console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red"); + Deno.exit(-1); + } + // read the file const data = await readCsv(options.input, options); @@ -83,11 +95,11 @@ async function importCsv() { }; // show the submitted collection - console.log(collection); + console.log("Collection", collection); // create the new collection // import will fail if a collection with the same name exists - await pb.collections.import([collection]); + await pb.collections.import([collection], false); console.log( `%c[Import] Collection '${collectionName}' created!`, @@ -99,25 +111,46 @@ async function importCsv() { console.log(`[Import] Importing ${rows.length} rows...`); - const batch = pb.createBatch(); - for (let rowCount = 0; rowCount < rows.length; rowCount++) - batch.collection(collectionName).create(rows[rowCount]) - - try { - const result = await batch.send(); - let createdCount = 0; - for (const reqRes of result) { - if (reqRes.status === 200) - createdCount++; + const chunks = Math.floor(rows.length / BATCH_SIZE); + const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks; + let createdCount = 0; + let chunk = 0; + while (chunk < batches) { + // create new request + console.log(`[Import] Batch request #${chunk+1}`); + const batch = pb.createBatch(); + let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE; + if (chunkSize === 0) + chunkSize = BATCH_SIZE; + for (let rowCount = 0; rowCount < chunkSize; rowCount++) + batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount]) + // send the chunk + try { + const result = await batch.send(); + // TODO: this should become a debug-level log + //console.log("Array", result); + let chunkCreatedCount = 0; + for (const reqRes of result) { + if (reqRes.status === 200) + chunkCreatedCount++; + } + const color = chunkCreatedCount === chunkSize ? "green" : "orange"; + console.log( + `%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`, + `color: ${color}`, + ); + createdCount += chunkCreatedCount; + } catch(err) { + console.error(err); } - const color = createdCount === data.length ? "green" : "orange"; - console.log( - `%c[Import] Imported rows: ${createdCount}/${data.length}`, - `color: ${color}`, - ); - } catch(err) { - console.error(err); + chunk++; } + + const color = createdCount === data.length ? "green" : "orange"; + console.log( + `%c[Import] Imported rows: ${createdCount}/${data.length}`, + `color: ${color}`, + ); } importCsv(); diff --git a/json.ts b/json.ts index 8cbc0f2..9a25b9f 100644 --- a/json.ts +++ b/json.ts @@ -16,7 +16,7 @@ async function importJson() { // parse CLI args const options = parse(Deno.args, { - string: ["input"], + string: ["input", "max_batch"], boolean: ["id"], default: { /** @@ -27,6 +27,10 @@ async function importJson() { * Flag to always set `_id` column type to Plain text (detected by default). */ id: false, + /** + * Default max batch request size (configurable in PB dashboard). + */ + max_batch: "50" }, }); @@ -35,6 +39,14 @@ async function importJson() { Deno.exit(-1); } + let BATCH_SIZE = 50; + try { + BATCH_SIZE = parseInt(options.max_batch) + } catch (err) { + console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red"); + Deno.exit(-1); + } + // read the file const data = await readJson(options.input); @@ -65,7 +77,7 @@ async function importJson() { }; // show the submitted collection - console.log(collection); + console.log("Collection", collection); // create the new collection // import will fail if a collection with the same name exists @@ -81,25 +93,46 @@ async function importJson() { console.log(`[Import] Importing ${rows.length} rows...`); - const batch = pb.createBatch(); - for (let rowCount = 0; rowCount < rows.length; rowCount++) - batch.collection(collectionName).create(rows[rowCount]) - - try { - const result = await batch.send(); - let createdCount = 0; - for (const reqRes of result) { - if (reqRes.status === 200) - createdCount++; + const chunks = Math.floor(rows.length / BATCH_SIZE); + const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks; + let createdCount = 0; + let chunk = 0; + while (chunk < batches) { + // create new request + console.log(`[Import] Batch request #${chunk+1}`); + const batch = pb.createBatch(); + let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE; + if (chunkSize === 0) + chunkSize = BATCH_SIZE; + for (let rowCount = 0; rowCount < chunkSize; rowCount++) + batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount]) + // send the chunk + try { + const result = await batch.send(); + // TODO: this should become a debug-level log + //console.log("Array", result); + let chunkCreatedCount = 0; + for (const reqRes of result) { + if (reqRes.status === 200) + chunkCreatedCount++; + } + const color = chunkCreatedCount === chunkSize ? "green" : "orange"; + console.log( + `%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`, + `color: ${color}`, + ); + createdCount += chunkCreatedCount; + } catch(err) { + console.error(err); } - const color = createdCount === data.length ? "green" : "orange"; - console.log( - `%c[Import] Imported rows: ${createdCount}/${data.length}`, - `color: ${color}`, - ); - } catch(err) { - console.error(err); + chunk++; } + + const color = createdCount === data.length ? "green" : "orange"; + console.log( + `%c[Import] Imported rows: ${createdCount}/${data.length}`, + `color: ${color}`, + ); } importJson();