JSON import support

+ JSON import added
+ code structure refactored
This commit is contained in:
michal-kapala
2023-03-10 20:32:57 +01:00
parent c92fd20fbc
commit 63bf053c36
10 changed files with 556 additions and 194 deletions

View File

@ -1,5 +1,51 @@
import { readCSVObjects } from "https://deno.land/x/csv@v0.8.0/reader.ts";
import { ParserOptions, RawRow } from "../types/csv.ts";
import {
CommonCSVReaderOptions,
readCSVObjects,
} from "https://deno.land/x/csv@v0.8.0/reader.ts";
import {
CsvOptions,
ParsedRow,
ParserOptions,
RawCsvRow,
} from "../types/csv.ts";
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
} from "../types/pocketbase.ts";
import { createSchemaField, generateRowSchema } from "./pocketbase.ts";
import { isBool, isDate, isEmail, isJson, isNumber } from "./regex.ts";
/**
* Reads raw data from a CSV file.
* @param filename
* @param options
* @returns
*/
export async function readCsv(filename: string, options: CsvOptions) {
// parser options
const csvOptions = {
columnSeparator: options.delimiter,
lineSeparator: options.lf ? "\n" : "\r\n",
quote: options.quote,
} satisfies Partial<CommonCSVReaderOptions>;
// parses CSV
const data = await parseCsv(filename, csvOptions);
if (data === null) {
console.error(
`%c[Import] No data to import from ${filename}`,
"color: red",
);
Deno.exit(-2);
}
return data;
}
/**
* Parse a file to string-based object array.
@ -7,31 +53,31 @@ import { ParserOptions, RawRow } from "../types/csv.ts";
* @param csvOptions - Options for the parser
* @returns
*/
export async function parseCsv(
async function parseCsv(
filename: string | null,
csvOptions: ParserOptions,
): Promise<RawRow[] | null> {
const results: RawRow[] = [];
): Promise<RawCsvRow[] | null> {
const data: RawCsvRow[] = [];
try {
const f = await Deno.open(`./input/${filename}`);
for await (const obj of readCSVObjects(f, csvOptions)) {
results.push(obj);
data.push(obj);
}
f.close();
} catch (e) {
console.error(`%c${e}`, "color: red");
return null;
Deno.exit(-3);
}
// No columns
if (results.length === 0) {
if (data.length === 0) {
return null;
}
return results;
return data;
}
/**
@ -42,3 +88,129 @@ export async function parseCsv(
export function parseBool(value: string): boolean {
return ["true", "1"].includes(value);
}
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawCsvRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(
data: RawCsvRow[],
schema: SchemaField[],
): ParsedRow[] {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
console.log("RowSchema", rowSchema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawCsvRow, schema: PocketbaseRowSchema): ParsedRow {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-4);
}
}

129
utils/json.ts Normal file
View File

@ -0,0 +1,129 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import { RawJsonRow } from "../types/json.ts";
import { POCKETBASE_SYSFIELD } from "../types/pocketbase.ts";
import { createSchemaField } from "./pocketbase.ts";
import { isDate, isEmail } from "./regex.ts";
/**
* Reads an array of rows from a JSON file.
* @param filename The extension-inclusive name of input file.
* @returns
*/
export async function readJson(filename: string) {
const json = await parseJson(filename);
if (json === null) {
console.error(`%cFileError: Could not read ${filename}`, "color: red");
Deno.exit(-3);
}
if (!Array.isArray(json)) {
console.error(`%cFileError: ${filename} is not an array`, "color: red");
Deno.exit(-4);
}
if (json.length === 0) {
console.error(`%cFileError: No data in ${filename}`, "color: red");
Deno.exit(-5);
}
const arrayKeys = json.keys();
const rows: RawJsonRow[] = [];
for (const key of arrayKeys) {
rows.push(json[key] as RawJsonRow);
}
return rows;
}
/**
* Parses a JSON file.
* @param filename Name of the .json file (with extension)
* @returns
*/
async function parseJson(filename: string) {
try {
return JSON.parse(await Deno.readTextFile(`./input/${filename}`));
} catch (e) {
console.error(`%c${e}`, "color: red");
Deno.exit(-2);
}
}
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data Raw input data.
* @param prop Column name.
* @returns `SchemaField`
*/
export function addSchemaField(data: RawJsonRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
let value = data[0][prop];
// if necessary find a value
if (value === null) {
for (let i = 0; i < data.length; i++) {
if (data[i][prop] != null) {
value = data[i][prop];
}
break;
}
}
// all values are null
if (value == null) {
return createSchemaField(targetProp, "text");
}
switch (typeof value) {
case "boolean":
return createSchemaField(targetProp, "bool");
case "number":
case "bigint":
return createSchemaField(targetProp, "number");
case "string":
if (isEmail(data, targetProp)) {
return createSchemaField(targetProp, "email");
}
if (isDate(data, targetProp)) {
return createSchemaField(targetProp, "date");
}
return createSchemaField(targetProp, "text");
case "object":
return createSchemaField(targetProp, "json");
default:
return createSchemaField(targetProp, "text");
}
}
/**
* Renames properties conflicting with system column names.
* @param data Data rows.
* @returns
*/
export function resolveConflicts(data: RawJsonRow[]): RawJsonRow[] {
const rows: RawJsonRow[] = [];
for (const r of data) {
const row = r;
const keys = Object.keys(r);
for (const key of keys) {
if (POCKETBASE_SYSFIELD.includes(key.toLowerCase())) {
const value = r[key];
delete row[key];
const newKey = `_${key}`;
row[newKey] = value;
}
}
rows.push(row);
}
return rows;
}

View File

@ -1,56 +1,19 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import { ParsedRow, RawRow } from "../types/csv.ts";
import { RawCsvRow } from "../types/csv.ts";
import { RawJsonRow } from "../types/json.ts";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
} from "../types/pocketbase.ts";
import { parseBool } from "./csv.ts";
import { isBool, isDate, isEmail, isJson, isNumber } from "./regex.ts";
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
import { addSchemaField as addCsvSchemaField } from "./csv.ts";
import { addSchemaField as addJsonSchemaField } from "./json.ts";
/**
* Finds column's type in the schema.
* @param column - Column name
* @param schema - PocketBase collection schema
* @param column Column name.
* @param schema PocketBase collection schema.
* @returns
*/
export function getSchemaType(
@ -98,11 +61,14 @@ export function getSchemaType(
/**
* Builds a `SchemaField` object based on data type.
* @param name - Column name
* @param type - PocketBase type
* @param name Column name.
* @param type PocketBase type.
* @returns
*/
function createSchemaField(name: string, type: PocketbaseType): SchemaField {
export function createSchemaField(
name: string,
type: PocketbaseType,
): SchemaField {
switch (type) {
case POCKETBASE_TYPE.BOOL:
return new SchemaField({
@ -176,10 +142,10 @@ function createSchemaField(name: string, type: PocketbaseType): SchemaField {
/**
* Creates a row object schema from PocketBase collection schema.
* @param schema - PocketBase collection schema
* @param schema PocketBase collection schema.
* @returns
*/
export function generateRowSchema(schema: SchemaField[]) {
export function generateRowSchema(schema: SchemaField[]): PocketbaseRowSchema {
let instance: PocketbaseRowSchema = {};
let fieldType: PocketbaseType;
@ -193,12 +159,14 @@ export function generateRowSchema(schema: SchemaField[]) {
/**
* Parses raw objects into PocketBase collection schema fields.
* @param data - Raw parser output
* @param data Raw input data.
* @returns
*/
// deno-lint-ignore no-explicit-any
export function createSchema(
data: RawRow[],
data: { [key: string]: any },
stringifyId: boolean,
inputFormat: "csv" | "json",
): SchemaField[] {
const schema: SchemaField[] = [];
@ -213,95 +181,13 @@ export function createSchema(
if (stringifyId && prop.toLowerCase() === "id") {
schema.push(createSchemaField(`_${prop}`, "text"));
} else {
schema.push(addSchemaField(data, prop));
schema.push(
inputFormat === "csv"
? addCsvSchemaField(data as RawCsvRow[], prop)
: addJsonSchemaField(data as RawJsonRow[], prop),
);
}
}
return schema;
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(data: RawRow[], schema: SchemaField[]): ParsedRow[] {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
console.log("RowSchema", rowSchema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawRow, schema: PocketbaseRowSchema): ParsedRow {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a value compliant with correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-3);
}
}

View File

@ -1,12 +1,12 @@
import { RawRow } from "../types/csv.ts";
import { RawCsvRow } from "../types/csv.ts";
/**
* Checks if the column type could be `Bool`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isBool(data: RawRow[], prop: string): boolean {
export function isBool(data: RawCsvRow[], prop: string): boolean {
const zeroOrOne = /^(0|1)$/;
const trueOrFalse = /^(true|false)$/;
@ -36,11 +36,11 @@ export function isBool(data: RawRow[], prop: string): boolean {
/**
* Checks if the column type could be `Number` (integer or floating point).
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isNumber(data: RawRow[], prop: string): boolean {
export function isNumber(data: RawCsvRow[], prop: string): boolean {
const integer = /^-?[0-9]+$/;
const float = /^-?[0-9]+\.[0-9]*$/;
@ -66,11 +66,14 @@ export function isNumber(data: RawRow[], prop: string): boolean {
/**
* Checks if the column type could be `Email`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isEmail(data: RawRow[], prop: string): boolean {
export function isEmail(
data: { [key: string]: string }[],
prop: string,
): boolean {
const pattern = /^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$/;
let values = 0;
@ -78,7 +81,9 @@ export function isEmail(data: RawRow[], prop: string): boolean {
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
// - empty strings for CSV
// - null values for JSON
if (obj[prop] !== "" && obj[prop] !== null) {
values++;
if (obj[prop].match(pattern) !== null) {
matched++;
@ -92,11 +97,11 @@ export function isEmail(data: RawRow[], prop: string): boolean {
/**
* Parses the column values as JSON.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isJson(data: RawRow[], prop: string): boolean {
export function isJson(data: RawCsvRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
@ -119,17 +124,22 @@ export function isJson(data: RawRow[], prop: string): boolean {
/**
* Parses the column values using `Date.parse()`.
* @param data - Sample data
* @param prop - Validated property
* @param data Sample data.
* @param prop Validated property.
* @returns
*/
export function isDate(data: RawRow[], prop: string): boolean {
export function isDate(
data: { [key: string]: string }[],
prop: string,
): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
// - empty strings for CSV
// - null values for JSON
if (obj[prop] !== "" && obj[prop] !== null) {
values++;
const timestamp = Date.parse(obj[prop]);
if (!isNaN(timestamp)) {