CsvProxy

A proxy that allows to converty CSV to a table in another data format.

Members

Functions

serialize
void serialize(S serializer)
Undocumented in source. Be warned that the author may not have intended to support it.

Static functions

defaultIsSymbolHandler
bool defaultIsSymbolHandler(const(char)[] symbol, bool quoted)

Structs

Conversion
struct Conversion

A number of conversion conventions.

Variables

comment
char comment;

Skips rows the first consequent lines, which starts with this character.

conversionFinalizer
CsvAlgebraic delegate(return scope const(char)[] unquotedString, CsvAlgebraic scalar, bool quoted, size_t columnIndex) @(safe) pure @(nogc) conversionFinalizer;

Conversion callback to finish conversion resolution

conversions
const(Conversion)[] conversions;

The conversion map represented as array of from->to pairs.

falseStrings
const(string)[] falseStrings;
Undocumented in source.
fileName
string fileName;

File name for berrer error messages

fill
bool fill;
hasHeader
bool hasHeader;

If true the elements in the first row are symbolised.

isSymbolHandler
bool function(scope const(char)[] symbol, bool quoted) @(safe) pure @(nogc) isSymbolHandler;

A function used to determine if a string should be passed to a serializer as a symbol instead of strings. That may help to reduce memory allocation for data with a huge amount of equal cell values.` The default pattern follows regular expression [a-zA-Z_]+ and requires symbol to be presented without double quotes.

naStrings
const(string)[] naStrings;

N/A and NULL patterns are converted to Ion null when exposed to arrays and skipped when exposed to objects

parseNumbers
bool parseNumbers;

If true the parser tries to recognsise and parse numbers.

parseTimestamps
bool parseTimestamps;

If true the parser tries to recognsise and parse

quote
char quote;

Symbol to quote scalars

separator
char separator;

Scalar separator

skipEmptyLines
bool skipEmptyLines;
skipRows
ubyte skipRows;

Skips a number of rows

text
const(char)[] text;

An input CSV text. BOM isn't supported.

trueStrings
const(string)[] trueStrings;
Undocumented in source.

Examples

Matrix

import mir.test: should;
import mir.ndslice.slice: Slice;
import mir.ion.conv: serde;
import mir.ser.text;

alias Matrix = Slice!(double*, 2);

auto text = "1,2\n3,4\r\n5,6\n";

auto matrix = text.CsvProxy.serde!Matrix;
matrix.should == [[1, 2], [3, 4], [5, 6]];

Type resolution is performed for types defined in mir.algebraic_alias.csv:

  • typeof(null) - used for N/A values
  • bool
  • long
  • double
  • string
  • $(AlgorithmREF timestamp, Timestamp)
1     import mir.ion.conv: serde;
2     import mir.ndslice.slice: Slice;
3     import mir.ser.text: serializeTextPretty;
4     import mir.test: should;
5     import std.string: join;
6 
7     // alias Matrix = Slice!(CsvAlgebraic*, 2);
8 
9     CsvProxy csv = {
10         conversionFinalizer : (
11             unquotedString,
12             scalar,
13             wasQuoted,
14             columnIndex)
15         {
16             // Do we want to symbolize the data?
17             return !wasQuoted && unquotedString == `Billion` ?
18                 1000000000.CsvAlgebraic :
19                 scalar;
20         },
21         text : join([
22             // User-defined conversion
23             `Billion`
24             // `long` patterns
25             , `100`, `+200`, `-200`
26             // `double` pattern
27             , `+1.0`, `-.2`, `3.`, `3e-10`, `3d20`
28             // also `double` pattern
29             , `inf`, `+Inf`, `-INF`, `+NaN`, `-nan`, `NAN`
30             // `bool` patterns
31             , `TRUE`, `FALSE`
32             // `Timestamp` patterns
33             , `2021-02-03` // iso8601 extended
34             , `2001-12-15T02:59:43.1Z` //canonical
35             // Default NA patterns are converted to Ion `null` when exposed to arrays
36             // and skipped when exposed to objects
37             , ``
38             // strings
39             , `100_000`
40             , `_ab0`
41             , `_abc`
42             , `Str`
43             , `Value100`
44             , `iNF`
45             , `Infinity`
46             , `+Infinity`
47             , `.Infinity`
48             , `""`
49             , ` `
50         ], `,`)
51     };
52 
53     // Serializing CsvProxy to Amazon Ion (text version)
54     csv.serializeTextPretty!"    ".should ==
55 `[
56     [
57         1000000000,
58         100,
59         200,
60         -200,
61         1.0,
62         -0.2,
63         3.0,
64         3e-10,
65         3e+20,
66         +inf,
67         +inf,
68         -inf,
69         nan,
70         nan,
71         nan,
72         true,
73         false,
74         2021-02-03,
75         2001-12-15T02:59:43.1Z,
76         null,
77         "100_000",
78         "_ab0",
79         "_abc",
80         "Str",
81         "Value100",
82         "iNF",
83         "Infinity",
84         "+Infinity",
85         ".Infinity",
86         "",
87         " "
88     ]
89 ]`;

Transposed Matrix & Tuple support

import mir.ion.conv: serde;
import mir.date: Date; //also wotks with mir.timestamp and std.datetime
import mir.functional: Tuple;
import mir.ser.text: serializeText;
import mir.test: should;
import mir.ndslice.dynamic: transposed;

auto text = "str,2022-10-12,3.4\nb,2022-10-13,2\n";

auto matrix = text.CsvProxy.serde!(Slice!(CsvAlgebraic*, 2));
matrix.transposed.serializeText.should
    == q{[["str","b"],[2022-10-12,2022-10-13],[3.4,2]]};

alias T = Tuple!(string[], Date[], double[]);

matrix.transposed.serde!T.should == T(
        [`str`, `b`],
        [Date(2022, 10, 12), Date(2022, 10, 13)],
        [3.4, 2],
);

Converting NA to NaN

import mir.csv;
import mir.algebraic: Nullable, visit;
import mir.ion.conv: serde;
import mir.ndslice: Slice, map, slice;
import mir.ser.text: serializeText;
import mir.test: should;

auto text = "1,2\n3,4\n5,\n";
auto matrix = text
    .CsvProxy
    .serde!(Slice!(Nullable!double*, 2))
    .map!(visit!((double x) => x, (_) => double.nan))
    .slice;

matrix.serializeText.should == q{[[1.0,2.0],[3.0,4.0],[5.0,nan]]};

Meta