basic csv support

+ csv imports
+ import options
+ type detection
+ bool, number, date, text, email and json types supported
This commit is contained in:
michal-kapala
2023-03-06 01:39:18 +01:00
commit ac857af125
12 changed files with 800 additions and 0 deletions

44
utils/csv.ts Normal file
View File

@ -0,0 +1,44 @@
import { readCSVObjects } from "https://deno.land/x/csv@v0.8.0/reader.ts";
import { ParserOptions, RawRow } from "../types/csv.ts";
/**
* Parse a file to string-based object array.
* @param filename - Name of the .csv file (with extension)
* @param csvOptions - Options for the parser
* @returns
*/
export async function parseCsv(
filename: string | null,
csvOptions: ParserOptions,
): Promise<RawRow[] | null> {
const results: RawRow[] = [];
try {
const f = await Deno.open(`./input/${filename}`);
for await (const obj of readCSVObjects(f, csvOptions)) {
results.push(obj);
}
f.close();
} catch (e) {
console.error(`%c${e}`, "color: red");
return null;
}
// No columns
if (results.length === 0) {
return null;
}
return results;
}
/**
* Parses a boolean with truthy values being `'true'` and `'1'`.
* @param value Raw string value
* @returns
*/
export function parseBool(value: string): boolean {
return ["true", "1"].includes(value);
}

307
utils/pocketbase.ts Normal file
View File

@ -0,0 +1,307 @@
// @deno-types="https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.d.mts"
import { SchemaField } from "https://unpkg.com/pocketbase@0.12.0/dist/pocketbase.es.mjs";
import { ParsedRow, RawRow } from "../types/csv.ts";
import {
POCKETBASE_SYSFIELD,
POCKETBASE_TYPE,
PocketbaseRowSchema,
PocketbaseType,
} from "../types/pocketbase.ts";
import { parseBool } from "./csv.ts";
import { isBool, isDate, isEmail, isJson, isNumber } from "./regex.ts";
/**
* Matches column data against regular expressions to deduct the PocketBase type and returns a column definition.
* @param data - Raw parser output
* @param prop - Column name
* @returns `SchemaField`
*/
export function addSchemaField(data: RawRow[], prop: string): SchemaField {
// The new column is prefixed with underscore if it conflicts with a system field
const targetProp = POCKETBASE_SYSFIELD.includes(prop.toLowerCase())
? `_${prop}`
: prop;
// Precedence is important, more restricted types are matched on first
if (isBool(data, prop)) {
return createSchemaField(targetProp, "bool");
}
if (isNumber(data, prop)) {
return createSchemaField(targetProp, "number");
}
if (isEmail(data, prop)) {
return createSchemaField(targetProp, "email");
}
if (isJson(data, prop)) {
return createSchemaField(targetProp, "json");
}
if (isDate(data, prop)) {
return createSchemaField(targetProp, "date");
}
// Plain text is the default type
return createSchemaField(targetProp, "text");
}
/**
* Finds column's type in the schema.
* @param column - Column name
* @param schema - PocketBase collection schema
* @returns
*/
export function getSchemaType(
column: string,
schema: SchemaField[],
): PocketbaseType {
const schemaField = schema.find((field) => field.name === column);
// if somehow the data got structured wrong
if (schemaField === undefined) {
console.error(
`%cSchemaError: Supplied column '${column}' not found in collection schema`,
"color: red",
);
Deno.exit(-1);
}
switch (schemaField.type) {
case POCKETBASE_TYPE.BOOL:
return POCKETBASE_TYPE.BOOL;
case POCKETBASE_TYPE.NUMBER:
return POCKETBASE_TYPE.NUMBER;
case POCKETBASE_TYPE.PLAIN_TEXT:
return POCKETBASE_TYPE.PLAIN_TEXT;
case POCKETBASE_TYPE.EMAIL:
return POCKETBASE_TYPE.EMAIL;
case POCKETBASE_TYPE.JSON:
return POCKETBASE_TYPE.JSON;
case POCKETBASE_TYPE.DATETIME:
return POCKETBASE_TYPE.DATETIME;
default:
console.error(
`%cPbTypeError: Unsupported type '${schemaField.type}'`,
"color: red",
);
Deno.exit(-2);
}
}
/**
* Builds a `SchemaField` object based on data type.
* @param name - Column name
* @param type - PocketBase type
* @returns
*/
function createSchemaField(name: string, type: PocketbaseType): SchemaField {
switch (type) {
case POCKETBASE_TYPE.BOOL:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {},
});
case POCKETBASE_TYPE.NUMBER:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
case POCKETBASE_TYPE.PLAIN_TEXT:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
pattern: "",
},
});
case POCKETBASE_TYPE.EMAIL:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
case POCKETBASE_TYPE.JSON:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {},
});
case POCKETBASE_TYPE.DATETIME:
return new SchemaField({
name,
type,
system: false,
required: false,
unique: false,
options: {
min: null,
max: null,
},
});
}
}
/**
* Creates a row object schema from PocketBase collection schema.
* @param schema - PocketBase collection schema
* @returns
*/
export function generateRowSchema(schema: SchemaField[]) {
let instance: PocketbaseRowSchema = {};
let fieldType: PocketbaseType;
schema.forEach((field) => {
fieldType = getSchemaType(field.name, schema);
instance = { ...instance, [field.name]: fieldType };
});
return instance;
}
/**
* Parses raw objects into PocketBase collection schema fields.
* @param data - Raw parser output
* @returns
*/
export function createSchema(
data: RawRow[],
stringifyId: boolean,
): SchemaField[] {
const schema: SchemaField[] = [];
// Seeks patterns in up to 1k records to avoid poor performance on large datasets
if (data.length > 1000) {
data = data.slice(0, 1000);
}
// Analyzes each column, deducts a type and creates a schema field
for (const prop in data[0]) {
// respect --id option
if (stringifyId && prop.toLowerCase() === "id") {
schema.push(createSchemaField(`_${prop}`, "text"));
} else {
schema.push(addSchemaField(data, prop));
}
}
return schema;
}
/**
* Parses typed rows using Pocketbase collection schema.
* @param data - Raw CSV parser output
* @param schema - PocketBase collection schema
* @returns
*/
export function parseData(data: RawRow[], schema: SchemaField[]): ParsedRow[] {
const rows: ParsedRow[] = [];
// create a row schema for the collection
const rowSchema = generateRowSchema(schema);
console.log("RowSchema", rowSchema);
data.forEach((rawRow) => {
rows.push(parseRow(rawRow, rowSchema));
});
return rows;
}
/**
* Creates a typed row object from raw data using row schema.
* @param rawRow - Raw row data
* @param schema - Row type template
* @returns
*/
function parseRow(rawRow: RawRow, schema: PocketbaseRowSchema): ParsedRow {
let parsedRow: ParsedRow = {};
const keys = Object.keys(rawRow);
keys.forEach((prop) => {
// Handle conflicts with system names - add underscore
const orgProp = prop;
if (POCKETBASE_SYSFIELD.includes(prop.toLowerCase())) {
prop = `_${prop}`;
}
const type = schema[prop];
const value = parseValue(rawRow[orgProp], type);
parsedRow = { ...parsedRow, [prop]: value };
});
return parsedRow;
}
/**
* Parses a string to a value compliant with correspending PocketBase type.
* @param value
* @param type
* @returns
*/
// deno-lint-ignore no-explicit-any
function parseValue(value: string, type: PocketbaseType): any {
switch (type) {
case POCKETBASE_TYPE.BOOL:
if (value == "") {
return null;
}
return parseBool(value);
case POCKETBASE_TYPE.NUMBER:
if (value == "") {
return null;
}
return parseFloat(value);
case POCKETBASE_TYPE.JSON:
if (value == "") {
return null;
}
// this is safe as the values were try-parsed earlier for schema definition
return JSON.parse(value);
case POCKETBASE_TYPE.PLAIN_TEXT:
return value !== "" ? value : null;
case POCKETBASE_TYPE.EMAIL:
return value !== "" ? value : null;
case POCKETBASE_TYPE.DATETIME:
return value !== "" ? value : null;
default:
console.error(
`%cPbTypeError: value parser for type '${type}' is not yet implemented.`,
"color: red",
);
Deno.exit(-3);
}
}

143
utils/regex.ts Normal file
View File

@ -0,0 +1,143 @@
import { RawRow } from "../types/csv.ts";
/**
* Checks if the column type could be `Bool`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isBool(data: RawRow[], prop: string): boolean {
const zeroOrOne = /^(0|1)$/;
const trueOrFalse = /^(true|false)$/;
let values = 0;
let matched = 0;
try {
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (
obj[prop].match(zeroOrOne) !== null ||
obj[prop].match(trueOrFalse) !== null
) {
matched++;
}
}
});
} catch (e) {
console.error(e);
}
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Checks if the column type could be `Number` (integer or floating point).
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isNumber(data: RawRow[], prop: string): boolean {
const integer = /^-?[0-9]+$/;
const float = /^-?[0-9]+\.[0-9]*$/;
let values = 0;
let matched = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (
obj[prop].match(integer) !== null ||
obj[prop].match(float) !== null
) {
matched++;
}
}
});
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Checks if the column type could be `Email`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isEmail(data: RawRow[], prop: string): boolean {
const pattern = /^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$/;
let values = 0;
let matched = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
if (obj[prop].match(pattern) !== null) {
matched++;
}
}
});
// an empty column will return false
return matched === values && matched > 0;
}
/**
* Parses the column values as JSON.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isJson(data: RawRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
// looks for an exception
try {
JSON.parse(obj[prop]);
parsed++;
} // deno-lint-ignore no-empty
catch {}
}
});
// an empty column will return false
return parsed === values && parsed > 0;
}
/**
* Parses the column values using `Date.parse()`.
* @param data - Sample data
* @param prop - Validated property
* @returns
*/
export function isDate(data: RawRow[], prop: string): boolean {
let values = 0;
let parsed = 0;
data.forEach((obj) => {
// could be nullable
if (obj[prop] !== "") {
values++;
const timestamp = Date.parse(obj[prop]);
if (!isNaN(timestamp)) {
parsed++;
}
}
});
// an empty column will return false
return parsed === values && parsed > 0;
}