Skip to content

Commit

Permalink
Wrapped all cells and updated json csv generators to use updated
Browse files Browse the repository at this point in the history
escaping.
  • Loading branch information
bradfordben committed Apr 16, 2020
1 parent 1fa768c commit 9e09185
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 112 deletions.
141 changes: 65 additions & 76 deletions core/kazoo_csv/src/kz_csv.erl
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@

,jobjs_to_file/1, jobjs_to_file/2
,write_header_to_file/1, write_header_to_file/2
]).
-export([from_jobjs/1
,from_jobjs/2

,from_jobjs/1 ,from_jobjs/2
]).

-include_lib("kazoo_stdlib/include/kz_types.hrl").
Expand All @@ -49,6 +48,7 @@
-ifdef(TEST).
-export([take_line/1]).
-export([parse_row/1]).
-export([cell_to_binary/1]).
-endif.

%%%=============================================================================
Expand Down Expand Up @@ -200,16 +200,25 @@ verify_mapped_row(Pred, MappedRow) when is_function(Pred, 2),
maps:fold(F, [], MappedRow).

%%------------------------------------------------------------------------------
%% @doc
%% @doc Convert a list of cells into a row.
%% Escape all double quotes with a leading double quote.
%% Surround all cells in double quotes.
%% Add a comma between all cells.
%% Add an new line to the end of the row.
%% This should be used to format all rows correctly and ensure all cells are
%% correctly escaped.
%% @end
%%------------------------------------------------------------------------------
-spec row_to_iolist(row()) -> iodata().
row_to_iolist([Cell]) -> [cell_to_binary(Cell), $\n];
row_to_iolist(Row=[_|_]) ->
lists:join($,, [cell_to_binary(Cell) || Cell <- Row]) ++ [$\n].
row_to_iolist(Cells=[_|_]) ->
[lists:join($,, [cell_to_binary(Cell) || Cell <- Cells]), $\n].

%%------------------------------------------------------------------------------
%% @doc
%% @doc Convert a maped row into a row.
%% Escape all double quotes with a leading double quote.
%% Surround all cells in double quotes.
%% Add a comma between all cells.
%% Add an new line to the end of the row.
%% @end
%%------------------------------------------------------------------------------
-spec mapped_row_to_iolist(row(), mapped_row()) -> iodata().
Expand All @@ -230,7 +239,7 @@ json_to_iolist(Records, Fields)
when is_list(Records),
is_list(Fields) ->
Tmp = <<"/tmp/json_", (kz_binary:rand_hex(11))/binary, ".csv">>,
'ok' = file:write_file(Tmp, [kz_term:iolist_join($,, Fields), $\n]),
'ok' = file:write_file(Tmp, row_to_iolist(Fields)),
lists:foreach(fun (Record) ->
Row = [kz_json:get_ne_binary_value(Field, Record, ?ZILCH) || Field <- Fields],
_ = file:write_file(Tmp, [row_to_iolist(Row)], ['append'])
Expand All @@ -250,15 +259,14 @@ write_header_to_file({File, CellOrdering}) ->
write_header_to_file({File, CellOrdering}, HeaderMap) ->
HeaderFile = <<File/binary, ".header">>,

Headings = [begin
Heading = kz_binary:join(Cells, <<"_">>),
props:get_value(Heading, HeaderMap, Heading)
end
|| Cells <- CellOrdering
],
Headers = [begin
Heading = kz_binary:join(Cells, <<"_">>),
props:get_value(Heading, HeaderMap, Heading)
end
|| Cells <- CellOrdering
],

Header = [csv_ize(Headings), $\n],
'ok' = file:write_file(HeaderFile, Header),
'ok' = file:write_file(HeaderFile, row_to_iolist(Headers)),

{'ok', _} = kz_os:cmd(<<"cat ", File/binary, " >> ", HeaderFile/binary>>),
{'ok', _} = file:copy(HeaderFile, File),
Expand Down Expand Up @@ -286,20 +294,12 @@ jobjs_to_file(JObjs, CellOrdering) ->
csv_filename() ->
<<"/tmp/json_", (kz_binary:rand_hex(11))/binary, ".csv">>.

-spec maybe_convert_cell_to_binary(kz_json:get_key(), kz_json:object()) -> binary().
maybe_convert_cell_to_binary(Path, JObj) ->
case kz_json:get_value(Path, JObj, ?ZILCH) of
List when is_list(List) -> list_to_binary(lists:join(",", List));
Value -> cell_to_binary(Value)
end.

-spec jobj_to_file(kz_json:object(), file_return()) -> file_return().
jobj_to_file(JObj, {File, CellOrdering}) ->
FlatJObj = kz_json:flatten(JObj),
NewOrdering = maybe_update_ordering(CellOrdering, FlatJObj),

Row = [maybe_convert_cell_to_binary(Path, JObj) || Path <- NewOrdering],
_ = file:write_file(File, [csv_ize(Row), $\n], ['append']),
Row = [kz_json:get_value(Path, JObj) || Path <- NewOrdering],
_ = file:write_file(File, row_to_iolist(Row), ['append']),
{File, NewOrdering}.

maybe_update_ordering(CellOrdering, FlatJObj) ->
Expand Down Expand Up @@ -333,10 +333,6 @@ from_jobjs(JObjs, Options) ->
],
lists:foldl(fun(F, J) -> F(J, Options) end, JObjs, Routines).

%%%=============================================================================
%%% Internal functions
%%%=============================================================================

%%------------------------------------------------------------------------------
%% @doc
%% @end
Expand Down Expand Up @@ -446,22 +442,39 @@ map_io_indices(Header, CSVHeader) ->

%%------------------------------------------------------------------------------
%% @doc Convert cell data to binary representation of a cell for writing to CSV
%% file, escaping double quotation marks and commas.
%% file, escaping double quotation marks with a leading double quotation mark
%% and leaving commas as all cells are wrapped in double quotation marks.
%% All cells should pass though this function to be correctly formatted.
%%
%% If a cell is a list, add commas to seperate the list items and try to convert
%% the list to a binary.
%% If the cell is non binary data, try and covert the cell to a binary.
%% If the conversion fails then use `?ZILCH' as the cells value.
%% @end
%%------------------------------------------------------------------------------
-spec cell_to_binary(cell()) -> binary().
cell_to_binary(?ZILCH) -> <<>>;
cell_to_binary(<<>>) -> <<"\"\"">>;
cell_to_binary(Cell=?NE_BINARY) ->
EscapedCell = binary:replace(Cell, <<"\"">>, <<"\"\"">>, ['global']),
case Cell =/= EscapedCell
orelse binary:match(Cell, <<$,>>) =/= 'nomatch'
of
'true' -> <<"\"", EscapedCell/binary, "\"">>;
'false' -> Cell
end;
<<"\"", (binary:replace(Cell, <<"\"">>, <<"\"\"">>, ['global']))/binary, "\"">>;
cell_to_binary(Cell) when is_list(Cell) ->
cell_to_binary(try_to_binary(lists:join(",", Cell), ?ZILCH));
cell_to_binary(Cell) ->
cell_to_binary(kz_term:to_binary(Cell)).
cell_to_binary(try_to_binary(Cell, ?ZILCH)).

%%------------------------------------------------------------------------------
%% @doc Try to convert the Value into a binary.
%% If the conversion fails the value `Default' is returned.
%% If `?ZILCH' is supplied then `?ZILCH' is returned.
%% @end
%%------------------------------------------------------------------------------
-spec try_to_binary(any(), Default) -> kz_term:binary() | Default.
try_to_binary(?ZILCH, _) -> ?ZILCH;
try_to_binary(Value, Default) ->
try kz_term:to_binary(Value)
catch
_E:_R -> Default
end.

-spec maybe_transform(kz_json:objects(), kz_term:proplist()) -> kz_json:objects().
maybe_transform(JObjs, Options) ->
Expand Down Expand Up @@ -510,14 +523,15 @@ fold_over_keys(Key, Hs) ->

-spec create_csv_header(kz_json:objects(), kz_term:proplist()) -> iolist().
create_csv_header(JObjs, Options) ->
Headers = case props:get_value('header_map', Options) of
'undefined' -> get_headers(JObjs);
HeaderMap ->
lists:map(fun(JObjHeader) -> header_map(JObjHeader, HeaderMap) end
,get_headers(JObjs)
)
end,
[csv_ize(lists:reverse(Headers)), $\n].
HeadersReversed = case props:get_value('header_map', Options) of
'undefined' -> get_headers(JObjs);
HeaderMap ->
lists:map(fun(JObjHeader) -> header_map(JObjHeader, HeaderMap) end
,get_headers(JObjs)
)
end,
Headers = lists:reverse(HeadersReversed),
row_to_iolist(Headers).

-spec header_map(kz_term:ne_binary(), kz_term:proplist()) -> kz_term:ne_binary().
header_map(JObjHeader, HeaderMap) ->
Expand All @@ -530,35 +544,10 @@ header_map(JObjHeader, HeaderMap) ->
json_objs_to_csv([], _) -> [];
json_objs_to_csv(JObjs, Options) ->
case props:is_true('build_headers', Options, 'true') of
'true' -> [create_csv_header(JObjs, Options), [[json_to_csv(JObj), $\n] || JObj <- JObjs]];
'false' -> [[json_to_csv(JObj), $\n] || JObj <- JObjs]
end.

%% wrap cells in quotes
-spec csv_ize(kz_json:path()) -> iolist().
csv_ize([F|Rest]) ->
[wrap_first_cell(try_to_binary(F))
,[wrap_next_cell(try_to_binary(V)) || V <- Rest]
].

wrap_first_cell(?ZILCH) ->
[];
wrap_first_cell(V) ->
[<<"\"">>, kz_term:to_binary(V), <<"\"">>].

wrap_next_cell(?ZILCH) ->
[<<",">>];
wrap_next_cell(V) ->
[<<",\"">>, V, <<"\"">>].

-spec try_to_binary(any()) -> kz_term:api_binary().
try_to_binary('undefined') -> 'undefined';
try_to_binary(Value) ->
try kz_term:to_binary(Value)
catch
_E:_R -> <<>>
'true' -> [create_csv_header(JObjs, Options), [json_to_csv(JObj) || JObj <- JObjs]];
'false' -> [json_to_csv(JObj) || JObj <- JObjs]
end.

-spec json_to_csv(kz_json:object()) -> iolist().
json_to_csv(JObj) ->
csv_ize(kz_json:values(JObj)).
row_to_iolist(kz_json:values(JObj)).
74 changes: 38 additions & 36 deletions core/kazoo_csv/test/kz_csv_tests.erl
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,15 @@ row_to_iolist_test_() ->
[?_assertException('error', 'function_clause', kz_csv:row_to_iolist([]))
]
++ [?_assertEqual(Expected, iolist_to_binary(kz_csv:row_to_iolist(Input)))
|| {Expected, Input} <- [{<<"a,b\n">>, [<<"a">>, <<"b">>]}
,{<<"a,,b\n">>, [<<"a">>, ?ZILCH, <<"b">>]}
,{<<",,b\n">>, [?ZILCH, ?ZILCH, <<"b">>]}
,{<<"a,b,\n">>, [<<"a">>, <<"b">>, ?ZILCH]}
,{<<"a,b,,,c\n">>, [<<"a">>, <<"b">>, ?ZILCH, ?ZILCH, <<"c">>]}
,{<<"a,\"comma, test\"\n">>, [<<"a">>, <<"comma, test">>]}
,{<<"a,\"double quote \"\"test\"\"\"\n">>, [<<"a">>, <<"double quote \"test\"">>]}
,{<<"a,", JSONCSV/binary>>, [<<"a">>, JSONRaw]}
|| {Expected, Input} <- [{<<"\"a\",\"b\"\n">>, [<<"a">>, <<"b">>]}
,{<<"\"a\",,\"b\"\n">>, [<<"a">>, ?ZILCH, <<"b">>]}
,{<<"\"a\",\"\",\"b\"\n">>, [<<"a">>, <<>>, <<"b">>]}
,{<<",,\"b\"\n">>, [?ZILCH, ?ZILCH, <<"b">>]}
,{<<"\"a\",\"b\",\n">>, [<<"a">>, <<"b">>, ?ZILCH]}
,{<<"\"a\",\"b\",,,\"c\"\n">>, [<<"a">>, <<"b">>, ?ZILCH, ?ZILCH, <<"c">>]}
,{<<"\"a\",\"comma, test\"\n">>, [<<"a">>, <<"comma, test">>]}
,{<<"\"a\",\"double quote \"\"test\"\"\"\n">>, [<<"a">>, <<"double quote \"test\"">>]}
,{<<"\"a\",", JSONCSV/binary>>, [<<"a">>, JSONRaw]}
]
].

Expand All @@ -218,26 +219,26 @@ mapped_row_to_iolist_test_() ->

mapped_row_data() ->
[{5, <<",,,,\n">>, #{}}
,{5, <<"a,b,,,\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>}}
,{2, <<"a,b\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>}}
,{3, <<"a,,b\n">>, #{<<"1">> => <<"a">>, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{2, <<"a,\n">>, #{<<"1">> => <<"a">>, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{3, <<",,b\n">>, #{<<"1">> => ?ZILCH, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{5, <<"\"a\",\"b\",,,\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>}}
,{2, <<"\"a\",\"b\"\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>}}
,{3, <<"\"a\",,\"b\"\n">>, #{<<"1">> => <<"a">>, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{2, <<"\"a\",\n">>, #{<<"1">> => <<"a">>, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{3, <<",,\"b\"\n">>, #{<<"1">> => ?ZILCH, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{2, <<",\n">>, #{<<"1">> => ?ZILCH, <<"2">> => ?ZILCH, <<"3">> => <<"b">>}}
,{3, <<"a,b,\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>, <<"3">> => ?ZILCH}}
,{2, <<"a,b\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>, <<"3">> => ?ZILCH}}
,{5, <<"a,b,,,c\n">>, #{<<"1">> => <<"a">>
,<<"2">> => <<"b">>
,<<"3">> => ?ZILCH
,<<"4">> => ?ZILCH
,<<"5">> => <<"c">>
}}
,{4, <<"a,b,,\n">>, #{<<"1">> => <<"a">>
,<<"2">> => <<"b">>
,<<"3">> => ?ZILCH
,<<"4">> => ?ZILCH
,<<"5">> => <<"c">>
}}
,{3, <<"\"a\",\"b\",\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>, <<"3">> => ?ZILCH}}
,{2, <<"\"a\",\"b\"\n">>, #{<<"1">> => <<"a">>, <<"2">> => <<"b">>, <<"3">> => ?ZILCH}}
,{5, <<"\"a\",\"b\",,,\"c\"\n">>, #{<<"1">> => <<"a">>
,<<"2">> => <<"b">>
,<<"3">> => ?ZILCH
,<<"4">> => ?ZILCH
,<<"5">> => <<"c">>
}}
,{4, <<"\"a\",\"b\",,\n">>, #{<<"1">> => <<"a">>
,<<"2">> => <<"b">>
,<<"3">> => ?ZILCH
,<<"4">> => ?ZILCH
,<<"5">> => <<"c">>
}}
].

json_to_iolist_test_() ->
Expand All @@ -251,11 +252,11 @@ json_to_iolist_test_() ->
Records3 = [kz_json:from_list([{<<"account_id">>,<<"account0000000000000000000000002">>}, {<<"e164">>,<<"+14157215234">>}, {<<"cnam.outbound">>,<<"me">>}])
,kz_json:from_list([{<<"account_id">>,<<>>}, {<<"e164">>,<<"+14157215235">>}, {<<"cnam.outbound">>,<<>>}])
],
[?_assertEqual(<<"A\na1\n42\n">>, kz_csv:json_to_iolist(Records1))
,?_assertEqual(<<"field1,field deux\n,QUUX\n,\nr'bla.+\\n',\n">>
[?_assertEqual(<<"\"A\"\n\"a1\"\n\"42\"\n">>, kz_csv:json_to_iolist(Records1))
,?_assertEqual(<<"\"field1\",\"field deux\"\n,\"QUUX\"\n,\n\"r'bla.+\\n'\",\n">>
,kz_csv:json_to_iolist(Records2, [<<"field1">>,<<"field deux">>])
)
,?_assertEqual(<<"account_id,e164,cnam.outbound\naccount0000000000000000000000002,+14157215234,me\n,+14157215235,\n">>, kz_csv:json_to_iolist(Records3))
,?_assertEqual(<<"\"account_id\",\"e164\",\"cnam.outbound\"\n\"account0000000000000000000000002\",\"+14157215234\",\"me\"\n,\"+14157215235\",\n">>, kz_csv:json_to_iolist(Records3))
].

parse_test_() ->
Expand All @@ -276,10 +277,10 @@ parse_test_() ->
)
,?_assertEqual([?ZILCH, ?ZILCH], kz_csv:parse_row(<<",">>))
,?_assertEqual([<<"test">>,?ZILCH], kz_csv:parse_row(<<"test,">>))
,?_assertEqual([<<"test,">>,<<"foo">>], kz_csv:parse_row(<<"\"test,\",foo">>))
,?_assertEqual([<<"\"test\"">>,<<"foo">>], kz_csv:parse_row(<<"\"\"\"test\"\"\",foo">>))
,?_assertEqual([<<"This is a \"test\"">>,<<"foo">>], kz_csv:parse_row(<<"\"This is a \"\"test\"\"\",foo">>))
,?_assertEqual([<<"test ,">>,<<" foo ">>, <<"bar ">>], kz_csv:parse_row(<<" \"test ,\" , foo ,bar ">>))
,?_assertEqual([<<"test,">>,<<"foo">>], kz_csv:parse_row(<<"\"test,\",\"foo\"">>))
,?_assertEqual([<<"\"test\"">>,<<"foo">>], kz_csv:parse_row(<<"\"\"\"test\"\"\",\"foo\"">>))
,?_assertEqual([<<"This is a \"test\"">>,<<"foo">>], kz_csv:parse_row(<<"\"This is a \"\"test\"\"\",\"foo\"">>))
,?_assertEqual([<<"test ,">>,<<" foo ">>, <<"bar ">>], kz_csv:parse_row(<<" \"test ,\" ,\" foo \",\"bar \"">>))
,?_assertEqual([<<"test">>,?ZILCH,?ZILCH], kz_csv:parse_row(<<"test,,">>))
,?_assertEqual([<<"test">>,?ZILCH,<<"foo bar">>], kz_csv:parse_row(<<"test,,foo bar">>))
,?_assertEqual([?ZILCH,<<"test">>,<<"''">>,<<"foo bar">>], kz_csv:parse_row(<<",test,'',foo bar">>))
Expand All @@ -301,6 +302,7 @@ variable_json_test() ->
,<<"{\"a\":2, \"b\":3}">>
,<<"{\"b\":3, \"a\":4}">>
,<<"{\"c\":3, \"a\":4}">>
,<<"{\"c\":\"c-value, test\", \"a\":\"\"}">>
],
JObjs = [kz_json:decode(JSON) || JSON <- JSONs],
{File, CellOrdering} = kz_csv:jobjs_to_file(JObjs),
Expand All @@ -310,7 +312,7 @@ variable_json_test() ->
?assertEqual([[<<"a">>], [<<"b">>], [<<"c">>]], CellOrdering),

{'ok', CSV} = file:read_file(File),
Expected = <<"\"a\",\"b\",\"c\"\n\"1\"\n\"2\",\"3\"\n\"4\",\"3\"\n\"4\",\"\",\"3\"\n">>,
Expected = <<"\"a\",\"b\",\"c\"\n\"1\"\n\"2\",\"3\"\n\"4\",\"3\"\n\"4\",,\"3\"\n\"\",,\"c-value, test\"\n">>,

?assertEqual(Expected, CSV),

Expand All @@ -329,7 +331,7 @@ comma_list_json_test() ->
?assertEqual([[<<"a">>], [<<"b">>]], CellOrdering),

{'ok', CSV} = file:read_file(File),
Expected = <<"\"a\",\"b\"\n\"x,y\"\n\"\",\"x,y,z\"\n\"\",\"\"\n">>,
Expected = <<"\"a\",\"b\"\n\"x,y\"\n\"\",\"x,y,z\"\n,\n">>,

?assertEqual(Expected, CSV),

Expand Down

0 comments on commit 9e09185

Please sign in to comment.