Compare commits

...

9 Commits

Author SHA1 Message Date
84589aec16 remove deno settings 2024-12-29 17:50:15 +01:00
78f3dfc7bd add batch request chunking (#1)
+ multiple batch requests of size configurable via max_batch
2024-11-27 23:29:47 +01:00
ec13e0fb17 Update README.md
+ batch API config note
2024-11-27 17:53:51 +01:00
b5d11cfc28 feat: use batch api (#1)
+ bulk record creation support for JSON and CSV
2024-11-27 17:34:56 +01:00
df56216609 update to v0.23.1
+ package version bump to 0.22.0
+ support for PB v0.23.1 collections
2024-11-26 17:39:41 +01:00
ce03fd9991 update schema gen
+ support for PocketBase v0.22.23
+ support for Deno 2
+ now depends on pocketbase npm package
2024-11-06 16:17:06 +01:00
8692f25e71 Update README.md 2023-06-28 02:17:19 +02:00
3c78bb2a1b url type added
+ support for `Url` columns
2023-06-28 02:16:09 +02:00
63bf053c36 JSON import support
+ JSON import added
+ code structure refactored
2023-03-10 20:32:57 +01:00
16 changed files with 864 additions and 318 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.env
*.txt

View File

@ -1,5 +0,0 @@
{
"deno.enable": true,
"deno.lint": true,
"editor.defaultFormatter": "denoland.vscode-deno"
}

View File

@ -7,7 +7,12 @@ Automatically creates typed PocketBase collection and populates it with data.
Columns conflicting with PocketBase's autogenerated system fields (`id`,
`created`, `updated`; case-insensitive check, target column name's case is not
affected) are prefixed with `_`.
affected) are prefixed with `_`. Collection conflict will cause the import to
fail without any changes to the database.
No rules, options or constraints are set for the new collection (see the import
log for a full structure). You can modify them after the import from
PocketBase's dashboard.
## Types
@ -20,9 +25,11 @@ PocketBase types are:
- `Email`
- `DateTime`
- `JSON`
- `Url`
# Configuration
Install the latest [Deno runtime](https://deno.land/) to run the scripts.
Install the latest [Deno runtime](https://deno.com/) to run the scripts.
In the root directory create `.env` file with the following environment
variables:
@ -33,20 +40,33 @@ variables:
Place your import files inside of `input` directory.
Make sure the target PocketBase instance is running and pointed to by
`POCKETBASE_URL`.
For versions `>=0.23.2`, enable and configure [batch API](https://pocketbase.io/docs/api-records/#batch-createupdateupsertdelete-records) access - go to *Settings > Application* in PocketBase dashboard (see below). Adjust the parameters to your performance requirements or use the defaults for small/middle-sized datasets.
![Batch API settings](https://github.com/user-attachments/assets/3205bf36-1e86-471b-a1b9-c28c6b118065)
# Options
You can change the default import options to your needs:
| Name | Files | Required | Description | Example use |
| --------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ------------------- |
| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv |
| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id |
| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CLRF (`\r\n`) | --lf |
| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" |
| quote | CSV | No | Value quote character, defaults to `'` | --quote='~" |
| Name | Files | Required | Description | Example use |
| --------- | -------- | -------- | ------------------------------------------------------------------------------------------------------------ | ------------------- |
| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv |
| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id |
| max_batch | CSV/JSON | No | Max batch request size in rows, should not exceed PocketBase's `Max allowed batch requests`. Defaults to 50. | --max_batch=100 |
| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CRLF (`\r\n`) | --lf |
| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=";" |
| quote | CSV | No | Value quote character, defaults to `'` | --quote="~" |
# CSV
The import is **not** multiline-safe, so if you have a file with strings
spanning across multiple lines the best option for you is to convert the input
file to JSON with tools like
[DB Browser for SQLite](https://sqlitebrowser.org/).
## Examples
Basic import (root directory):
@ -55,6 +75,12 @@ Basic import (root directory):
deno run csv.ts --input=example.csv
```
Import without permission prompts and with max batch request size of 1 row:
```
deno run --allow-read --allow-env --allow-net csv.ts --input=example.csv --max_batch=1
```
Import without permission prompts and with `_id` column as text:
```
@ -66,3 +92,27 @@ Import with custom parser options (you need to adjust `example.csv`):
```
deno run csv.ts --input=example.csv --delimiter=";" --quote="~" --lf
```
# JSON
The required data format is an array of row objects.
## Examples
Basic import (root directory):
```
deno run json.ts --input=example.json
```
Import without permission prompts and with max batch request size of 1 row:
```
deno run --allow-read --allow-env --allow-net json.ts --input=example.json --max_batch=1
```
Import without permission prompts and with `_id` column as text:
```
deno run --allow-read --allow-env --allow-net json.ts --input=example.json --id
```

118
csv.ts
View File

@ -1,16 +1,12 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import PocketBase, {
Collection,
SchemaField,
} from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import PocketBase from 'pocketbase';
import "https://deno.land/std@0.178.0/dotenv/load.ts";
import { parse } from "https://deno.land/std@0.175.0/flags/mod.ts";
import { parseCsv } from "./utils/csv.ts";
import { createSchema, parseData } from "./utils/pocketbase.ts";
import { parseData, readCsv } from "./utils/csv.ts";
import { createSchema } from "./utils/pocketbase.ts";
import { Collection } from "./types/pocketbase.ts";
/**
* Structures and populates a new collection from a CSV file.
* @returns
*/
async function importCsv() {
// config data
@ -20,7 +16,7 @@ async function importCsv() {
// parse CLI args
const options = parse(Deno.args, {
string: ["input", "delimiter", "quote"],
string: ["input", "delimiter", "quote", "max_batch"],
boolean: ["id", "lf"],
default: {
/**
@ -43,35 +39,35 @@ async function importCsv() {
* Whether LF end-of-line should be used (defaults to CRLF).
*/
lf: false,
/**
* Default max batch request size (configurable in PB dashboard).
*/
max_batch: "50"
},
});
if (options.input === null) {
console.error("%cOptionError: CSV file name not supplied", "color: red");
return;
Deno.exit(-1);
}
// parser options
const csvOptions = {
columnSeparator: options.delimiter,
lineSeparator: options.lf ? "\n" : "\r\n",
quote: options.quote,
};
let BATCH_SIZE = 50;
try {
BATCH_SIZE = parseInt(options.max_batch)
} catch (err) {
console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red");
Deno.exit(-1);
}
// parses CSV
const data = await parseCsv(options.input, csvOptions);
// empty file
// read the file
const data = await readCsv(options.input, options);
if (data === null) {
console.error(
`%c[Import] No data to import from ${options.input}`,
"color: red",
);
return;
Deno.exit(-1);
return
}
// sanitize the file name for collection name
const collectName = options.input.replace(".csv", "");
const collectionName = options.input.replace(".csv", "");
// connect to pocketbase
const pb = new PocketBase(pbUrl);
@ -80,59 +76,79 @@ async function importCsv() {
const _authResponse = await pb.admins.authWithPassword(adminName, adminPass);
// collection schema object
const schema: SchemaField[] = createSchema(data, options.id);
const fields = createSchema(data, options.id, "csv");
const creationDate = new Date().toISOString();
// the new collection
const collection = new Collection({
name: collectName,
const collection: Collection = {
name: collectionName,
type: "base",
system: false,
schema,
fields,
indexes: [],
listRule: null,
viewRule: null,
createRule: null,
updateRule: null,
deleteRule: null,
options: {},
created: creationDate,
updated: creationDate,
});
};
// show the submitted collection
console.log(collection);
console.log("Collection", collection);
// create the new collection
// import will fail if a collection with the same name exists
await pb.collections.import([collection]);
await pb.collections.import([collection], false);
console.log(
`%c[Import] Collection '${collectName}' created!`,
`%c[Import] Collection '${collectionName}' created!`,
"color: green",
);
// rows to be sent via PocketBase API
const rows = parseData(data, schema);
const rows = parseData(data, fields);
// number of successfully inserted rows
let insertCount = 0;
console.log(`[Import] Importing ${rows.length} rows...`);
for (insertCount; insertCount < rows.length; insertCount++) {
const chunks = Math.floor(rows.length / BATCH_SIZE);
const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks;
let createdCount = 0;
let chunk = 0;
while (chunk < batches) {
// create new request
console.log(`[Import] Batch request #${chunk+1}`);
const batch = pb.createBatch();
let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE;
if (chunkSize === 0)
chunkSize = BATCH_SIZE;
for (let rowCount = 0; rowCount < chunkSize; rowCount++)
batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount])
// send the chunk
try {
await pb.collection(collectName).create(rows[insertCount], {
"$autoCancel": false,
});
} catch (e) {
// breaks on first error
console.error(e);
break;
const result = await batch.send();
// TODO: this should become a debug-level log
//console.log("Array<BatchRequestResult>", result);
let chunkCreatedCount = 0;
for (const reqRes of result) {
if (reqRes.status === 200)
chunkCreatedCount++;
}
const color = chunkCreatedCount === chunkSize ? "green" : "orange";
console.log(
`%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`,
`color: ${color}`,
);
createdCount += chunkCreatedCount;
} catch(err) {
console.error(err);
}
chunk++;
}
const color = insertCount === rows.length ? "green" : "orange";
const color = createdCount === data.length ? "green" : "orange";
console.log(
`%c[Import] Imported rows: ${insertCount}/${rows.length}`,
`%c[Import] Imported rows: ${createdCount}/${data.length}`,
`color: ${color}`,
);
}

View File

@ -1,5 +1,8 @@
{
"tasks": {
"dev": "deno run --watch main.ts"
},
"imports": {
"pocketbase": "npm:pocketbase@^0.22.0"
}
}

21
deno.lock generated
View File

@ -1,5 +1,13 @@
{
"version": "2",
"version": "4",
"specifiers": {
"npm:pocketbase@0.22": "0.22.0"
},
"npm": {
"pocketbase@0.22.0": {
"integrity": "sha512-jhP0Dcf2Z/4q+SNxqpgV+SJWLZeU0rOJA4TKMxwU7X2olFSBr0jhLu+G6Pc+RIQ40IYrC3WMGwbASPrK6rxQOw=="
}
},
"remote": {
"https://deno.land/std@0.164.0/_util/asserts.ts": "d0844e9b62510f89ce1f9878b046f6a57bf88f208a10304aab50efcb48365272",
"https://deno.land/std@0.164.0/bytes/bytes_list.ts": "aba5e2369e77d426b10af1de0dcc4531acecec27f9b9056f4f7bfbf8ac147ab4",
@ -8,7 +16,6 @@
"https://deno.land/std@0.164.0/fmt/colors.ts": "9e36a716611dcd2e4865adea9c4bec916b5c60caad4cdcdc630d4974e6bb8bd4",
"https://deno.land/std@0.164.0/fs/exists.ts": "6a447912e49eb79cc640adacfbf4b0baf8e17ede6d5bed057062ce33c4fa0d68",
"https://deno.land/std@0.164.0/io/buffer.ts": "245f1762a949082ddc0a6e9b15589d0be2d29c150266decd04320b8a8318f9f6",
"https://deno.land/std@0.164.0/io/types.d.ts": "107e1e64834c5ba917c783f446b407d33432c5d612c4b3430df64fc2b4ecf091",
"https://deno.land/std@0.164.0/log/handlers.ts": "61ab932822ba268ad42b25de6f0014892fe469df7627b14245ad32db6fd8b54d",
"https://deno.land/std@0.164.0/log/levels.ts": "82c965b90f763b5313e7595d4ba78d5095a13646d18430ebaf547526131604d1",
"https://deno.land/std@0.164.0/log/logger.ts": "b545159727b023825ee6814d9178fb2a7472f8d95d704c253b771b95c658cf8c",
@ -21,10 +28,12 @@
"https://deno.land/std@0.178.0/dotenv/load.ts": "0636983549b98f29ab75c9a22a42d9723f0a389ece5498fe971e7bb2556a12e2",
"https://deno.land/std@0.178.0/dotenv/mod.ts": "8dcbc8a40b896a0bf094582aaeadbfc76d3528872faf2efc0302beb1d2f6afd0",
"https://deno.land/x/csv@v0.8.0/deps.ts": "597e3d0c81eca1c519ce20f0a7ed573d31cfbb5625c5013763550d717ba69dfa",
"https://deno.land/x/csv@v0.8.0/mod.ts": "2a13285c8716257aa5d6b5021d45c74dcb275eaaa46776106ddf78b071cabd88",
"https://deno.land/x/csv@v0.8.0/reader.ts": "ad6543223f8d1c17c78b18dbaa464c3d81f7a0970b08aa5fbde9eb48e2a680d1",
"https://deno.land/x/csv@v0.8.0/utils.ts": "7f2467acb031244c150774b9d6caa5e8c40f9857031a947c6ad20765c5d7bd20",
"https://deno.land/x/csv@v0.8.0/writer.ts": "18a46b755cc215695862d3a896b3a7b24b33cb7ee626cbebeb7138193cb7edcf",
"https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs": "bcc669b0d3844523cd8121a75c2ead5e14f1dc9426bcf8904621fc26a75b44c0"
"https://deno.land/x/csv@v0.8.0/utils.ts": "7f2467acb031244c150774b9d6caa5e8c40f9857031a947c6ad20765c5d7bd20"
},
"workspace": {
"dependencies": [
"npm:pocketbase@0.22"
]
}
}

View File

@ -1,3 +1,3 @@
id,name,is_good,score,email,json,date
1,john,1,0.8412384213497,john.doe@example.com,[],2023-03-05T00:35:21.104Z
2,fire,0,-80347329472,firebase@google.com,{"xd": "nice meme"},
id,name,is_good,score,email,json,date,url
1,john,1,0.8412384213497,john.doe@example.com,[],2023-03-05T00:35:21.104Z,https://example.com
2,fire,0,-80347329472,firebase@google.com,{"xd": "nice meme"},,sftp://fire@8.8.8.8/base.txt
Can't render this file because it contains an unexpected character in line 3 and column 44.

24
input/example.json Normal file
View File

@ -0,0 +1,24 @@
[
{
"id": 1,
"name": "john",
"is_good": true,
"score": 0.8412384213497,
"email": "john.doe@example.com",
"json": [],
"date": "2023-03-05T00:35:21.104Z",
"url": "https://example.com"
},
{
"id": 2,
"name": "fire",
"is_good": false,
"score": -80347329472,
"email": "firebase@google.com",
"json": {
"xd": "nice meme"
},
"date": null,
"url": "sftp://fire@8.8.8.8/base.txt"
}
]

138
json.ts Normal file
View File

@ -0,0 +1,138 @@
import PocketBase from 'pocketbase';
import "https://deno.land/std@0.178.0/dotenv/load.ts";
import { parse } from "https://deno.land/std@0.175.0/flags/mod.ts";
import { readJson, resolveConflicts } from "./utils/json.ts";
import { createSchema } from "./utils/pocketbase.ts";
import { Collection } from './types/pocketbase.ts'
/**
* Structures and populates a new collection from a JSON file.
*/
async function importJson() {
// config data
const pbUrl = Deno.env.get("POCKETBASE_URL") ?? "http://localhost:8090";
const adminName = Deno.env.get("ADMIN_EMAIL") ?? "";
const adminPass = Deno.env.get("ADMIN_PASSWORD") ?? "";
// parse CLI args
const options = parse(Deno.args, {
string: ["input", "max_batch"],
boolean: ["id"],
default: {
/**
* Name of the JSON file to import (with extension).
*/
input: null,
/**
* Flag to always set `_id` column type to Plain text (detected by default).
*/
id: false,
/**
* Default max batch request size (configurable in PB dashboard).
*/
max_batch: "50"
},
});
if (options.input === null) {
console.error("%cOptionError: JSON file name not supplied", "color: red");
Deno.exit(-1);
}
let BATCH_SIZE = 50;
try {
BATCH_SIZE = parseInt(options.max_batch)
} catch (err) {
console.error("%cOptionError: invalid 'max_batch' value, should be an integer", "color: red");
Deno.exit(-1);
}
// read the file
const data = await readJson(options.input);
// sanitize the file name for collection name
const collectionName = options.input.replace(".json", "");
// connect to pocketbase
const pb = new PocketBase(pbUrl);
// authenticate as super admin
const _authResponse = await pb.admins.authWithPassword(adminName, adminPass);
// collection schema object
const fields = createSchema(data, options.id, "json");
// the new collection
const collection: Collection = {
name: collectionName,
type: "base",
system: false,
fields,
indexes: [],
listRule: null,
viewRule: null,
createRule: null,
updateRule: null,
deleteRule: null,
};
// show the submitted collection
console.log("Collection", collection);
// create the new collection
// import will fail if a collection with the same name exists
await pb.collections.import([collection], false);
console.log(
`%c[Import] Collection '${collectionName}' created!`,
"color: green",
);
// prefix conflicting column names
const rows = resolveConflicts(data);
console.log(`[Import] Importing ${rows.length} rows...`);
const chunks = Math.floor(rows.length / BATCH_SIZE);
const batches = chunks * BATCH_SIZE < rows.length ? chunks + 1 : chunks;
let createdCount = 0;
let chunk = 0;
while (chunk < batches) {
// create new request
console.log(`[Import] Batch request #${chunk+1}`);
const batch = pb.createBatch();
let chunkSize = chunk === batches - 1 ? rows.length % BATCH_SIZE : BATCH_SIZE;
if (chunkSize === 0)
chunkSize = BATCH_SIZE;
for (let rowCount = 0; rowCount < chunkSize; rowCount++)
batch.collection(collectionName).create(rows[chunk * BATCH_SIZE + rowCount])
// send the chunk
try {
const result = await batch.send();
// TODO: this should become a debug-level log
//console.log("Array<BatchRequestResult>", result);
let chunkCreatedCount = 0;
for (const reqRes of result) {
if (reqRes.status === 200)
chunkCreatedCount++;
}
const color = chunkCreatedCount === chunkSize ? "green" : "orange";
console.log(
`%c[Import] Batch request #${chunk+1} - imported rows: ${chunkCreatedCount}/${chunkSize}`,
`color: ${color}`,
);
createdCount += chunkCreatedCount;
} catch(err) {
console.error(err);
}
chunk++;
}
const color = createdCount === data.length ? "green" : "orange";
console.log(
`%c[Import] Imported rows: ${createdCount}/${data.length}`,
`color: ${color}`,
);
}
importJson();

View File

@ -6,9 +6,9 @@ import { CommonCSVReaderOptions } from "https://deno.land/x/csv@v0.8.0/reader.ts
export type ParserOptions = Partial<CommonCSVReaderOptions>;
/**
* Raw row object with string properties returned by `csv.readCSVObjects`.
* Raw CSV row returned by `csv.readCSVObjects`.
*/
export type RawRow = {
export type RawCsvRow = {
[key: string]: string;
};
@ -19,3 +19,9 @@ export type ParsedRow = {
// deno-lint-ignore no-explicit-any
[key: string]: any;
};
export type CsvOptions = {
delimiter: string;
lf: boolean;
quote: string;
};

7
types/json.ts Normal file
View File

@ -0,0 +1,7 @@
/**
* Raw JSON object returned by `JSON.parse`.
*/
export type RawJsonRow = {
// deno-lint-ignore no-explicit-any
[key: string]: any;
};

View File

@ -8,6 +8,7 @@ export const POCKETBASE_TYPE = {
EMAIL: "email",
JSON: "json",
DATETIME: "date",
URL: "url",
} as const;
type ObjectValues<T> = T[keyof T];
@ -32,3 +33,70 @@ export const POCKETBASE_SYSFIELD = [
"created",
"updated",
];
export interface SchemaField {
hidden: boolean;
id?: string;
name: string;
presentable: boolean;
required: boolean;
system: boolean;
type: PocketbaseType;
};
export interface BoolField extends SchemaField {
type: "bool";
};
export interface NumberField extends SchemaField {
max?: number;
min?: number;
onlyInt: boolean;
type: "number";
};
export interface TextField extends SchemaField {
autogeneratePattern: string;
max?: number;
min?: number;
pattern: string;
primaryKey: boolean;
type: "text";
};
export interface EmailField extends SchemaField {
exceptDomains?: string[];
onlyDomains?: string[];
type: "email";
};
export interface DateField extends SchemaField {
max: string;
min: string;
type: "date";
}
export interface JsonField extends SchemaField {
maxSize: number;
type: "json";
}
export interface UrlField extends SchemaField {
exceptDomains?: string[];
onlyDomains?: string[];
type: "url";
}
export interface Collection {
id?: string;
name: string;
type: string;
system: boolean;
fields: SchemaField[];
indexes: string[];
listRule: string | null;
viewRule: string | null;
createRule: string | null;
updateRule: string | null;
deleteRule: string | null;
};

View File

@ -1,5 +1,50 @@
import { readCSVObjects } from "https://deno.land/x/csv@v0.8.0/reader.ts";
import { ParserOptions, RawRow } from "../types/csv.ts";
import {
CommonCSVReaderOptions,
readCSVObjects,
} from "https://deno.land/x/csv@v0.8.0/reader.ts";
import {
CsvOptions,
ParsedRow,
ParserOptions,
RawCsvRow,
} from "../types/csv.ts";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
SchemaField
} from "../types/pocketbase.ts";
import { createSchemaField, generateRowSchema } from "./pocketbase.ts";
import { isBool, isDate, isEmail, isJson, isNumber, isUrl } from "./regex.ts";
/**
* Reads raw data from a CSV file.
* @param filename
* @param options
* @returns
*/
export async function readCsv(filename: string, options: CsvOptions) {
// parser options
const csvOptions = {
columnSeparator: options.delimiter,
lineSeparator: options.lf ? "\n" : "\r\n",
quote: options.quote,
} satisfies Partial<CommonCSVReaderOptions>;
// parses CSV
const data = await parseCsv(filename, csvOptions);
if (data === null) {
console.error(
`%c[Import] No data to import from ${filename}`,
"color: red",
);
Deno.exit(-2);
}
return data;
}
/**
* Parse a file to string-based object array.
@ -7,31 +52,31 @@ import { ParserOptions, RawRow } from "../types/csv.ts";
* @param csvOptions - Options for the parser
* @returns
*/
export async function parseCsv(
async function parseCsv(
filename: string | null,
csvOptions: ParserOptions,
): Promise<RawRow[] | null> {
const results: RawRow[] = [];
) {
const data: RawCsvRow[] = [];
try {
const f = await Deno.open(`./input/${filename}`);
for await (const obj of readCSVObjects(f, csvOptions)) {
results.push(obj);
data.push(obj);
}
f.close();
} catch (e) {
console.error(`%c${e}`, "color: red");
return null;
Deno.exit(-3);
}
// No columns
if (results.length === 0) {
if (data.length === 0) {
return null;
}
return results;
return data;
}
/**
@ -39,6 +84,137 @@ export async function parseCsv(
* @param value Raw string value
* @returns
*/
export function parseBool(value: string): boolean {
export function parseBool(value: string) {
return ["true", "1"].includes(value);
}
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawCsvRow[], prop: string) {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
if (isUrl(data, prop)) {
return createSchemaField(targetProp, "url");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(
data: RawCsvRow[],
schema: SchemaField[],
) {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawCsvRow, schema: PocketbaseRowSchema) {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
case POCKETBASE_TYPE.URL:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-4);
}
}

130
utils/json.ts Normal file
View File

@ -0,0 +1,130 @@
import { RawJsonRow } from "../types/json.ts";
import { POCKETBASE_SYSFIELD } from "../types/pocketbase.ts";
import { createSchemaField } from "./pocketbase.ts";
import { isDate, isEmail, isUrl } from "./regex.ts";
/**
* Reads an array of rows from a JSON file.
* @param filename The extension-inclusive name of input file.
* @returns
*/
export async function readJson(filename: string) {
const json = await parseJson(filename);
if (json === null) {
console.error(`%cFileError: Could not read ${filename}`, "color: red");
Deno.exit(-3);
}
if (!Array.isArray(json)) {
console.error(`%cFileError: ${filename} is not an array`, "color: red");
Deno.exit(-4);
}
if (json.length === 0) {
console.error(`%cFileError: No data in ${filename}`, "color: red");
Deno.exit(-5);
}
const arrayKeys = json.keys();
const rows: RawJsonRow[] = [];
for (const key of arrayKeys) {
rows.push(json[key] as RawJsonRow);
}
return rows;
}
/**
* Parses a JSON file.
* @param filename Name of the .json file (with extension)
* @returns
*/
async function parseJson(filename: string) {
try {
return JSON.parse(await Deno.readTextFile(`./input/${filename}`));
} catch (e) {
console.error(`%c${e}`, "color: red");
Deno.exit(-2);
}
}
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data Raw input data.
* @param prop Column name.
* @returns `SchemaField`
*/
export function addSchemaField(data: RawJsonRow[], prop: string) {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
let value = data[0][prop];
// if necessary find a value
if (value === null) {
for (let i = 0; i < data.length; i++) {
if (data[i][prop] != null) {
value = data[i][prop];
}
break;
}
}
// all values are null
if (value == null) {
return createSchemaField(targetProp, "text");
}
switch (typeof value) {
case "boolean":
return createSchemaField(targetProp, "bool");
case "number":
case "bigint":
return createSchemaField(targetProp, "number");
case "string":
if(isUrl(data, targetProp)) {
return createSchemaField(targetProp, "url");
}
if (isEmail(data, targetProp)) {
return createSchemaField(targetProp, "email");
}
if (isDate(data, targetProp)) {
return createSchemaField(targetProp, "date");
}
return createSchemaField(targetProp, "text");
case "object":
return createSchemaField(targetProp, "json");
default:
return createSchemaField(targetProp, "text");
}
}
/**
* Renames properties conflicting with system column names.
* @param data Data rows.
* @returns
*/
export function resolveConflicts(data: RawJsonRow[]) {
const rows: RawJsonRow[] = [];
for (const r of data) {
const row = r;
const keys = Object.keys(r);
for (const key of keys) {
if (POCKETBASE_SYSFIELD.includes(key.toLowerCase())) {
const value = r[key];
delete row[key];
const newKey = `_${key}`;
row[newKey] = value;
}
}
rows.push(row);
}
return rows;
}

View File

@ -1,56 +1,25 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import { ParsedRow, RawRow } from "../types/csv.ts";
import { RawCsvRow } from "../types/csv.ts";
import { RawJsonRow } from "../types/json.ts";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
SchemaField,
BoolField,
NumberField,
TextField,
EmailField,
JsonField,
DateField,
UrlField
} from "../types/pocketbase.ts";
import { parseBool } from "./csv.ts";
import { isBool, isDate, isEmail, isJson, isNumber } from "./regex.ts";
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
import { addSchemaField as addCsvSchemaField } from "./csv.ts";
import { addSchemaField as addJsonSchemaField } from "./json.ts";
/**
* Finds column's type in the schema.
* @param column - Column name
* @param schema - PocketBase collection schema
* @param column Column name.
* @param schema PocketBase collection schema.
* @returns
*/
export function getSchemaType(
@ -66,120 +35,119 @@ export function getSchemaType(
"color: red",
);
Deno.exit(-1);
return "text";
}
switch (schemaField.type) {
case POCKETBASE_TYPE.BOOL:
return POCKETBASE_TYPE.BOOL;
case POCKETBASE_TYPE.NUMBER:
return POCKETBASE_TYPE.NUMBER;
case POCKETBASE_TYPE.PLAIN_TEXT:
return POCKETBASE_TYPE.PLAIN_TEXT;
case POCKETBASE_TYPE.EMAIL:
return POCKETBASE_TYPE.EMAIL;
case POCKETBASE_TYPE.JSON:
return POCKETBASE_TYPE.JSON;
case POCKETBASE_TYPE.DATETIME:
return POCKETBASE_TYPE.DATETIME;
default:
console.error(
`%cPbTypeError: Unsupported type '${schemaField.type}'`,
"color: red",
);
Deno.exit(-2);
if (schemaField.type == null) {
console.error(
`%cSchemaError: Column type missing for '${column}'`,
"color: red",
);
Deno.exit(-1);
return "text";
}
return schemaField.type;
}
/**
* Builds a `SchemaField` object based on data type.
* @param name - Column name
* @param type - PocketBase type
* @param name Column name.
* @param type PocketBase type.
* @returns
*/
function createSchemaField(name: string, type: PocketbaseType): SchemaField {
export function createSchemaField(
name: string,
type: PocketbaseType,
): SchemaField {
switch (type) {
case POCKETBASE_TYPE.BOOL:
return new SchemaField({
return {
hidden: false,
name,
type,
system: false,
presentable: false,
required: false,
unique: false,
options: {},
});
system: false,
type,
} as BoolField;
case POCKETBASE_TYPE.NUMBER:
return new SchemaField({
return {
hidden: false,
max: undefined,
min: undefined,
name,
type,
system: false,
onlyInt: false,
presentable: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
system: false,
type,
} as NumberField;
case POCKETBASE_TYPE.PLAIN_TEXT:
return new SchemaField({
return {
autogeneratePattern: "",
hidden: false,
max: 0,
min: 0,
name,
type,
system: false,
pattern: "",
presentable: false,
primaryKey: false,
required: false,
unique: false,
options: {
min: null,
max: null,
pattern: "",
},
});
system: false,
type,
} as TextField;
case POCKETBASE_TYPE.EMAIL:
return new SchemaField({
return {
exceptDomains: undefined,
hidden: false,
name,
type,
system: false,
onlyDomains: undefined,
presentable: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
system: false,
type,
} as EmailField;
case POCKETBASE_TYPE.JSON:
return new SchemaField({
return {
hidden: false,
maxSize: 0,
name,
type,
system: false,
presentable: false,
required: false,
unique: false,
options: {},
});
system: false,
type,
} as JsonField;
case POCKETBASE_TYPE.DATETIME:
return new SchemaField({
return {
hidden: false,
max: "",
min: "",
name,
type,
system: false,
presentable: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
system: false,
type,
} as DateField;
case POCKETBASE_TYPE.URL:
return {
hidden: false,
exceptDomains: undefined,
name,
onlyDomains: undefined,
presentable: false,
required: false,
system: false,
type,
} as UrlField;
}
}
/**
* Creates a row object schema from PocketBase collection schema.
* @param schema - PocketBase collection schema
* @param schema PocketBase collection schema.
* @returns
*/
export function generateRowSchema(schema: SchemaField[]) {
export function generateRowSchema(schema: SchemaField[]): PocketbaseRowSchema {
let instance: PocketbaseRowSchema = {};
let fieldType: PocketbaseType;
@ -193,13 +161,15 @@ export function generateRowSchema(schema: SchemaField[]) {
/**
* Parses raw objects into PocketBase collection schema fields.
* @param data - Raw parser output
* @param data Raw input data.
* @returns
*/
// deno-lint-ignore no-explicit-any
export function createSchema(
data: RawRow[],
data: { [key: string]: any },
stringifyId: boolean,
): SchemaField[] {
inputFormat: "csv" | "json",
) {
const schema: SchemaField[] = [];
// Seeks patterns in up to 1k records to avoid poor performance on large datasets
@ -213,95 +183,13 @@ export function createSchema(
if (stringifyId && prop.toLowerCase() === "id") {
schema.push(createSchemaField(`_${prop}`, "text"));
} else {
schema.push(addSchemaField(data, prop));
schema.push(
inputFormat === "csv"
? addCsvSchemaField(data as RawCsvRow[], prop)
: addJsonSchemaField(data as RawJsonRow[], prop),
);
}
}
return schema;
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(data: RawRow[], schema: SchemaField[]): ParsedRow[] {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
console.log("RowSchema", rowSchema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawRow, schema: PocketbaseRowSchema): ParsedRow {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a value compliant with correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-3);
}
}

View File

@ -1,12 +1,12 @@
import { RawRow } from "../types/csv.ts";
import { RawCsvRow } from "../types/csv.ts";
/**
* Checks if the column type could be `Bool`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isBool(data: RawRow[], prop: string): boolean {
export function isBool(data: RawCsvRow[], prop: string): boolean {
const zeroOrOne = /^(0|1)$/;
const trueOrFalse = /^(true|false)$/;
@ -36,11 +36,11 @@ export function isBool(data: RawRow[], prop: string): boolean {
/**
* Checks if the column type could be `Number` (integer or floating point).
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isNumber(data: RawRow[], prop: string): boolean {
export function isNumber(data: RawCsvRow[], prop: string): boolean {
const integer = /^-?[0-9]+$/;
const float = /^-?[0-9]+\.[0-9]*$/;
@ -64,13 +64,41 @@ export function isNumber(data: RawRow[], prop: string): boolean {
return matched === values && matched > 0;
}
/**
* Checks if the column type could be `Url`.
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isUrl(data: RawCsvRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
if (obj[prop] !== "" && obj[prop] !== null) {
values++;
try {
new URL(obj[prop]);
parsed++;
} // deno-lint-ignore no-empty
catch {}
}
});
// an empty column will return false
return parsed === values && parsed > 0;
}
/**
* Checks if the column type could be `Email`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isEmail(data: RawRow[], prop: string): boolean {
export function isEmail(
data: { [key: string]: string }[],
prop: string,
): boolean {
const pattern = /^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$/;
let values = 0;
@ -78,7 +106,9 @@ export function isEmail(data: RawRow[], prop: string): boolean {
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
// - empty strings for CSV
// - null values for JSON
if (obj[prop] !== "" && obj[prop] !== null) {
values++;
if (obj[prop].match(pattern) !== null) {
matched++;
@ -92,11 +122,11 @@ export function isEmail(data: RawRow[], prop: string): boolean {
/**
* Parses the column values as JSON.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isJson(data: RawRow[], prop: string): boolean {
export function isJson(data: RawCsvRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
@ -119,17 +149,22 @@ export function isJson(data: RawRow[], prop: string): boolean {
/**
* Parses the column values using `Date.parse()`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isDate(data: RawRow[], prop: string): boolean {
export function isDate(
data: { [key: string]: string }[],
prop: string,
): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
// - empty strings for CSV
// - null values for JSON
if (obj[prop] !== "" && obj[prop] !== null) {
values++;
const timestamp = Date.parse(obj[prop]);
if (!isNaN(timestamp)) {