basic csv support

+ csv imports
+ import options
+ type detection
+ bool, number, date, text, email and json types supported
This commit is contained in:
michal-kapala 2023-03-06 01:39:18 +01:00
commit ac857af125
12 changed files with 800 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.env

5
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,5 @@
{
"deno.enable": true,
"deno.lint": true,
"editor.defaultFormatter": "denoland.vscode-deno"
}

67
README.md Normal file
View File

@ -0,0 +1,67 @@
# Overview
PocketBase data import tools for CSV and JSON files made using
[PocketBase JS SDK](https://github.com/pocketbase/js-sdk).
Automatically creates typed PocketBase collection and populates it with data.
Columns conflicting with PocketBase's autogenerated system fields (`id`,
`created`, `updated`; case-insensitive check, target column name's case is not
affected) are prefixed with `_`.
## Types
`pocketbase-import` detects types using regular expressions. Currently supported
PocketBase types are:
- `Bool`
- `Number`
- `Plain text`
- `Email`
- `DateTime`
- `JSON`
# Configuration
In the root directory create `.env` file with the following environment
variables:
- `ADMIN_EMAIL` (required) - superadmin email
- `ADMIN_PASSWORD` (required) - superadmin password
- `POCKETBASE_URL` (optional) - PocketBase app URL, defaults to local instance
Place your import files inside of `input` directory.
# Options
You can change the default import options to your needs:
| Name | Files | Required | Description | Example use |
| --------- | -------- | -------- | ------------------------------------------------------------------------------------------ | ------------------- |
| input | CSV/JSON | Yes | The name of the input file (with extension) | --input=example.csv |
| id | CSV/JSON | No | Indicates that `_id` column should be typed as plain text, the type is detected by default | --id |
| lf | CSV | No | LF (`\n`) EOL character will be used instead of default CLRF (`\r\n`) | --lf |
| delimiter | CSV | No | Column value separator, defaults to `,` | --delimiter=; |
| quote | CSV | No | Value quote character, defaults to `'` | --quote=~ |
# CSV
## Examples
Basic import (root directory):
```
deno run csv.ts --input=example.csv
```
Import without permission prompts and with `_id` column as text:
```
deno run --allow-read --allow-env --allow-net csv.ts --input=example.csv --id
```
Import with custom parser options (you need to adjust `example.csv`):
```
deno run csv.ts --input=example.csv --delimiter=; --query=~ --lf
```

140
csv.ts Normal file
View File

@ -0,0 +1,140 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import PocketBase, {
Collection,
SchemaField,
} from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import "https://deno.land/std@0.178.0/dotenv/load.ts";
import { parse } from "https://deno.land/std@0.175.0/flags/mod.ts";
import { parseCsv } from "./utils/csv.ts";
import { createSchema, parseData } from "./utils/pocketbase.ts";
/**
* Structures and populates a new collection from a CSV file.
* @returns
*/
async function importCsv() {
// config data
const pbUrl = Deno.env.get("POCKETBASE_URL") ?? "http://localhost:8090";
const adminName = Deno.env.get("ADMIN_EMAIL") ?? "";
const adminPass = Deno.env.get("ADMIN_PASSWORD") ?? "";
// parse CLI args
const options = parse(Deno.args, {
string: ["input", "delimiter", "quote"],
boolean: ["id", "lf"],
default: {
/**
* Name of the CSV file to import.
*/
input: null,
/**
* Value separator (defaults to `,`).
*/
delimiter: ",",
/**
* Quote character (defaults to `'`).
*/
quote: "'",
/**
* Flag to always set `_id` column type to Plain text (detected by default).
*/
id: false,
/**
* Whether LF end-of-line should be used (defaults to CRLF).
*/
lf: false,
},
});
if (options.input === null) {
console.error("%cOptionError: CSV file name not supplied", "color: red");
return;
}
// parser options
const csvOptions = {
columnSeparator: options.delimiter,
lineSeparator: options.lf ? "\n" : "\r\n",
quote: options.quote,
};
// parses CSV
const data = await parseCsv(options.input, csvOptions);
// empty file
if (data === null) {
console.error(
`%c[Import] No data to import from ${options.input}`,
"color: red",
);
return;
}
// sanitize the file name for collection name
const collectName = options.input.replace(".csv", "");
// connect to pocketbase
const pb = new PocketBase(pbUrl);
// authenticate as super admin
const _authResponse = await pb.admins.authWithPassword(adminName, adminPass);
// collection schema object
const schema: SchemaField[] = createSchema(data, options.id);
const creationDate = new Date().toISOString();
// the new collection
const collection = new Collection({
name: collectName,
type: "base",
system: false,
schema,
listRule: null,
viewRule: null,
createRule: null,
updateRule: null,
deleteRule: null,
options: {},
created: creationDate,
updated: creationDate,
});
// show the submitted collection
console.log(collection);
// create the new collection
// import will fail if a collection with the same name exists
await pb.collections.import([collection]);
console.log(
`%c[Import] Collection '${collectName}' created!`,
"color: green",
);
// rows to be sent via PocketBase API
const rows = parseData(data, schema);
// number of successfully inserted rows
let insertCount = 0;
for (insertCount; insertCount < rows.length; insertCount++) {
try {
await pb.collection(collectName).create(rows[insertCount], {
"$autoCancel": false,
});
} catch (e) {
// breaks on first error
console.error(e);
break;
}
}
const color = insertCount === rows.length ? "green" : "orange";
console.log(
`%c[Import] Imported rows: ${insertCount}/${rows.length}`,
`color: ${color}`,
);
}
importCsv();

5
deno.jsonc Normal file
View File

@ -0,0 +1,5 @@
{
"tasks": {
"dev": "deno run --watch main.ts"
}
}

30
deno.lock generated Normal file
View File

@ -0,0 +1,30 @@
{
"version": "2",
"remote": {
"https://deno.land/std@0.164.0/_util/asserts.ts": "d0844e9b62510f89ce1f9878b046f6a57bf88f208a10304aab50efcb48365272",
"https://deno.land/std@0.164.0/bytes/bytes_list.ts": "aba5e2369e77d426b10af1de0dcc4531acecec27f9b9056f4f7bfbf8ac147ab4",
"https://deno.land/std@0.164.0/bytes/equals.ts": "3c3558c3ae85526f84510aa2b48ab2ad7bdd899e2e0f5b7a8ffc85acb3a6043a",
"https://deno.land/std@0.164.0/bytes/mod.ts": "b2e342fd3669176a27a4e15061e9d588b89c1aaf5008ab71766e23669565d179",
"https://deno.land/std@0.164.0/fmt/colors.ts": "9e36a716611dcd2e4865adea9c4bec916b5c60caad4cdcdc630d4974e6bb8bd4",
"https://deno.land/std@0.164.0/fs/exists.ts": "6a447912e49eb79cc640adacfbf4b0baf8e17ede6d5bed057062ce33c4fa0d68",
"https://deno.land/std@0.164.0/io/buffer.ts": "245f1762a949082ddc0a6e9b15589d0be2d29c150266decd04320b8a8318f9f6",
"https://deno.land/std@0.164.0/io/types.d.ts": "107e1e64834c5ba917c783f446b407d33432c5d612c4b3430df64fc2b4ecf091",
"https://deno.land/std@0.164.0/log/handlers.ts": "61ab932822ba268ad42b25de6f0014892fe469df7627b14245ad32db6fd8b54d",
"https://deno.land/std@0.164.0/log/levels.ts": "82c965b90f763b5313e7595d4ba78d5095a13646d18430ebaf547526131604d1",
"https://deno.land/std@0.164.0/log/logger.ts": "b545159727b023825ee6814d9178fb2a7472f8d95d704c253b771b95c658cf8c",
"https://deno.land/std@0.164.0/log/mod.ts": "90618a3fdbb4520ed4e7d960665322a3dbfc107eebb3345fe0f36c8c8a7b7962",
"https://deno.land/std@0.164.0/streams/conversion.ts": "555c6c249f3acf85655f2d0af52d1cb3168e40b1c1fa26beefea501b333abe28",
"https://deno.land/std@0.175.0/_util/asserts.ts": "178dfc49a464aee693a7e285567b3d0b555dc805ff490505a8aae34f9cfb1462",
"https://deno.land/std@0.175.0/flags/mod.ts": "d1cdefa18472ef69858a17df5cf7c98445ed27ac10e1460183081303b0ebc270",
"https://deno.land/std@0.178.0/collections/filter_values.ts": "5b9feaf17b9a6e5ffccdd36cf6f38fa4ffa94cff2602d381c2ad0c2a97929652",
"https://deno.land/std@0.178.0/collections/without_all.ts": "a89f5da0b5830defed4f59666e188df411d8fece35a5f6ca69be6ca71a95c185",
"https://deno.land/std@0.178.0/dotenv/load.ts": "0636983549b98f29ab75c9a22a42d9723f0a389ece5498fe971e7bb2556a12e2",
"https://deno.land/std@0.178.0/dotenv/mod.ts": "8dcbc8a40b896a0bf094582aaeadbfc76d3528872faf2efc0302beb1d2f6afd0",
"https://deno.land/x/csv@v0.8.0/deps.ts": "597e3d0c81eca1c519ce20f0a7ed573d31cfbb5625c5013763550d717ba69dfa",
"https://deno.land/x/csv@v0.8.0/mod.ts": "2a13285c8716257aa5d6b5021d45c74dcb275eaaa46776106ddf78b071cabd88",
"https://deno.land/x/csv@v0.8.0/reader.ts": "ad6543223f8d1c17c78b18dbaa464c3d81f7a0970b08aa5fbde9eb48e2a680d1",
"https://deno.land/x/csv@v0.8.0/utils.ts": "7f2467acb031244c150774b9d6caa5e8c40f9857031a947c6ad20765c5d7bd20",
"https://deno.land/x/csv@v0.8.0/writer.ts": "18a46b755cc215695862d3a896b3a7b24b33cb7ee626cbebeb7138193cb7edcf",
"https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs": "bcc669b0d3844523cd8121a75c2ead5e14f1dc9426bcf8904621fc26a75b44c0"
}
}

3
input/example.csv Normal file
View File

@ -0,0 +1,3 @@
id,name,is_good,score,email,json,date
1,john,1,0.8412384213497,john.doe@example.com,[],2023-03-05T00:35:21.104Z
2,fire,0,-80347329472,firebase@google.com,{"xd": "nice meme"},
Can't render this file because it contains an unexpected character in line 3 and column 44.

21
types/csv.ts Normal file
View File

@ -0,0 +1,21 @@
import { CommonCSVReaderOptions } from "https://deno.land/x/csv@v0.8.0/reader.ts";
/**
* Options object of `csv.readCSVObjects`.
*/
export type ParserOptions = Partial<CommonCSVReaderOptions>;
/**
* Raw row object with string properties returned by `csv.readCSVObjects`.
*/
export type RawRow = {
[key: string]: string;
};
/**
* Row object with values parsed accordingly to collection schema.
*/
export type ParsedRow = {
// deno-lint-ignore no-explicit-any
[key: string]: any;
};

34
types/pocketbase.ts Normal file
View File

@ -0,0 +1,34 @@
/**
* All the Pocketbase types supported by this tool.
*/
export const POCKETBASE_TYPE = {
BOOL: "bool",
NUMBER: "number",
PLAIN_TEXT: "text",
EMAIL: "email",
JSON: "json",
DATETIME: "date",
} as const;
type ObjectValues<T> = T[keyof T];
/**
* Supported Pocketbase data types.
*/
export type PocketbaseType = ObjectValues<typeof POCKETBASE_TYPE>;
/**
* A row type schema for column value parsing.
*/
export type PocketbaseRowSchema = {
[key: string]: PocketbaseType;
};
/**
* PocketBase system fields with autogenerated values that cannot be overriden (`base` collection type).
*/
export const POCKETBASE_SYSFIELD = [
"id",
"created",
"updated",
];

44
utils/csv.ts Normal file
View File

@ -0,0 +1,44 @@
import { readCSVObjects } from "https://deno.land/x/csv@v0.8.0/reader.ts";
import { ParserOptions, RawRow } from "../types/csv.ts";
/**
* Parse a file to string-based object array.
* @param filename - Name of the .csv file (with extension)
* @param csvOptions - Options for the parser
* @returns
*/
export async function parseCsv(
filename: string | null,
csvOptions: ParserOptions,
): Promise<RawRow[] | null> {
const results: RawRow[] = [];
try {
const f = await Deno.open(`./input/${filename}`);
for await (const obj of readCSVObjects(f, csvOptions)) {
results.push(obj);
}
f.close();
} catch (e) {
console.error(`%c${e}`, "color: red");
return null;
}
// No columns
if (results.length === 0) {
return null;
}
return results;
}
/**
* Parses a boolean with truthy values being `'true'` and `'1'`.
* @param value Raw string value
* @returns
*/
export function parseBool(value: string): boolean {
return ["true", "1"].includes(value);
}

307
utils/pocketbase.ts Normal file
View File

@ -0,0 +1,307 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import { ParsedRow, RawRow } from "../types/csv.ts";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
} from "../types/pocketbase.ts";
import { parseBool } from "./csv.ts";
import { isBool, isDate, isEmail, isJson, isNumber } from "./regex.ts";
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
/**
* Finds column's type in the schema.
* @param column - Column name
* @param schema - PocketBase collection schema
* @returns
*/
export function getSchemaType(
column: string,
schema: SchemaField[],
): PocketbaseType {
const schemaField = schema.find((field) => field.name === column);
// if somehow the data got structured wrong
if (schemaField === undefined) {
console.error(
`%cSchemaError: Supplied column '${column}' not found in collection schema`,
"color: red",
);
Deno.exit(-1);
}
switch (schemaField.type) {
case POCKETBASE_TYPE.BOOL:
return POCKETBASE_TYPE.BOOL;
case POCKETBASE_TYPE.NUMBER:
return POCKETBASE_TYPE.NUMBER;
case POCKETBASE_TYPE.PLAIN_TEXT:
return POCKETBASE_TYPE.PLAIN_TEXT;
case POCKETBASE_TYPE.EMAIL:
return POCKETBASE_TYPE.EMAIL;
case POCKETBASE_TYPE.JSON:
return POCKETBASE_TYPE.JSON;
case POCKETBASE_TYPE.DATETIME:
return POCKETBASE_TYPE.DATETIME;
default:
console.error(
`%cPbTypeError: Unsupported type '${schemaField.type}'`,
"color: red",
);
Deno.exit(-2);
}
}
/**
* Builds a `SchemaField` object based on data type.
* @param name - Column name
* @param type - PocketBase type
* @returns
*/
function createSchemaField(name: string, type: PocketbaseType): SchemaField {
switch (type) {
case POCKETBASE_TYPE.BOOL:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {},
});
case POCKETBASE_TYPE.NUMBER:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
case POCKETBASE_TYPE.PLAIN_TEXT:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
pattern: "",
},
});
case POCKETBASE_TYPE.EMAIL:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
case POCKETBASE_TYPE.JSON:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {},
});
case POCKETBASE_TYPE.DATETIME:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
}
}
/**
* Creates a row object schema from PocketBase collection schema.
* @param schema - PocketBase collection schema
* @returns
*/
export function generateRowSchema(schema: SchemaField[]) {
let instance: PocketbaseRowSchema = {};
let fieldType: PocketbaseType;
schema.forEach((field) => {
fieldType = getSchemaType(field.name, schema);
instance = { ...instance, [field.name]: fieldType };
});
return instance;
}
/**
* Parses raw objects into PocketBase collection schema fields.
* @param data - Raw parser output
* @returns
*/
export function createSchema(
data: RawRow[],
stringifyId: boolean,
): SchemaField[] {
const schema: SchemaField[] = [];
// Seeks patterns in up to 1k records to avoid poor performance on large datasets
if (data.length > 1000) {
data = data.slice(0, 1000);
}
// Analyzes each column, deducts a type and creates a schema field
for (const prop in data[0]) {
// respect --id option
if (stringifyId && prop.toLowerCase() === "id") {
schema.push(createSchemaField(`_${prop}`, "text"));
} else {
schema.push(addSchemaField(data, prop));
}
}
return schema;
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(data: RawRow[], schema: SchemaField[]): ParsedRow[] {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
console.log("RowSchema", rowSchema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawRow, schema: PocketbaseRowSchema): ParsedRow {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a value compliant with correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-3);
}
}

143
utils/regex.ts Normal file
View File

@ -0,0 +1,143 @@
import { RawRow } from "../types/csv.ts";
/**
* Checks if the column type could be `Bool`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isBool(data: RawRow[], prop: string): boolean {
const zeroOrOne = /^(0|1)$/;
const trueOrFalse = /^(true|false)$/;
let values = 0;
let matched = 0;
try {
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (
obj[prop].match(zeroOrOne) !== null ||
obj[prop].match(trueOrFalse) !== null
) {
matched++;
}
}
});
} catch (e) {
console.error(e);
}
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Checks if the column type could be `Number` (integer or floating point).
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isNumber(data: RawRow[], prop: string): boolean {
const integer = /^-?[0-9]+$/;
const float = /^-?[0-9]+\.[0-9]*$/;
let values = 0;
let matched = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (
obj[prop].match(integer) !== null ||
obj[prop].match(float) !== null
) {
matched++;
}
}
});
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Checks if the column type could be `Email`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isEmail(data: RawRow[], prop: string): boolean {
const pattern = /^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$/;
let values = 0;
let matched = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (obj[prop].match(pattern) !== null) {
matched++;
}
}
});
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Parses the column values as JSON.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isJson(data: RawRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
// looks for an exception
try {
JSON.parse(obj[prop]);
parsed++;
} // deno-lint-ignore no-empty
catch {}
}
});
// an empty column will return false
return parsed === values && parsed > 0;
}
/**
* Parses the column values using `Date.parse()`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isDate(data: RawRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
const timestamp = Date.parse(obj[prop]);
if (!isNaN(timestamp)) {
parsed++;
}
}
});
// an empty column will return false
return parsed === values && parsed > 0;
}