Read an NDJSON file with anonymous PL/SQL
21 Jan 2022
Table of Contents
I keep having to go back to old scripts and remember how I read from an NDJSON-formatted file into Oracle PL/SQL memory. (NDJSON is hacky JSON that works great as input to programming languages that are accustomed to reading or writing one line of text at a time when working with files.) This post contains a baseline I can start with from now on.
Anonymous PL/SQL
DECLARE
v_filedir VARCHAR2(80) := '&p_filedir'; -- file directory for data input and output
v_in_filename VARCHAR2(60) := '&p_in_filename'; -- filename for data input file including extension
PROCEDURE p_parse_json_obj ( p_in_obj JSON_OBJECT_T ) IS
BEGIN
DBMS_OUTPUT.PUT_LINE(p_in_obj.get_String('column_name_here'));
END p_parse_json_obj;
PROCEDURE p_read_ndjson_from_file() IS
l_file_handle UTL_FILE.FILE_TYPE;
l_file_line VARCHAR2(32767);
l_obj JSON_OBJECT_T;
l_counter pls_integer := 0; -- DEBUG LINE ONLY. Helps with short-circuiting.
BEGIN
UTL_FILE.FCLOSE_ALL;
l_file_handle := UTL_FILE.FOPEN(v_filedir, v_in_filename, 'R', 32767);
BEGIN
<< file_lines_loop >>
LOOP
UTL_FILE.GET_LINE(l_file_handle, l_file_line);
l_obj := JSON_OBJECT_T.PARSE(l_file_line);
--DBMS_OUTPUT.PUT_LINE(l_file_line); -- DEBUG LINE ONLY.
p_parse_json_obj(l_obj);
l_counter := l_counter + 1; -- DEBUG LINE ONLY. Helps with short-circuiting.
EXIT WHEN l_counter = 5; -- DEBUG LINE ONLY. Short-circuit.
END LOOP file_lines_loop;
EXCEPTION WHEN NO_DATA_FOUND THEN NULL;
END;
UTL_FILE.FCLOSE_ALL;
DBMS_OUTPUT.PUT_LINE( 'Data rows read: ' || l_counter ); -- DEBUG LINE ONLY.
END p_hello;
--===================================================
-- MAIN
--===================================================
BEGIN
DBMS_OUTPUT.PUT_LINE ('read_from_ndjson.sql started at ' || TO_CHAR (SYSDATE, 'DD-MON-YYYY HH24:MI:SS'));
p_read_ndjson_from_file();
DBMS_OUTPUT.PUT_LINE ('read_from_ndjson.sql ended at ' || TO_CHAR (SYSDATE, 'DD-MON-YYYY HH24:MI:SS'));
END;
/
Python CSV to NDJSON bonus
And here’s some Python to get a nuisance CSV file into NDJSON format when you don’t have Pandas handy. Run it from the command line like this:
python3 csv_to_json.py /this_is_an/input_directory my_input.csv /this_is_an/output_directory my_output.csv Y
(The code can be a lot simpler if you don’t need to run it from a command line, like with a cron scheduled job.)
import sys
import json
import csv
import os
# #########################################################################
# BEGIN: "Private" methods that should not be called from the command line
# #########################################################################
def __translate_file(input_csv_file_folder, input_csv_file_name, output_json_file_folder, output_json_file_name, do_nd_json):
# Please don't call this method from the command line. It's meant to be used internally only.
print('Reading from ' + input_csv_file_name)
with open(os.path.join(input_csv_file_folder, input_csv_file_name), mode='r') as fr:
data_list = [row for row in csv.DictReader(fr)]
print('Writing to ' + output_json_file_name)
with open(os.path.join(output_json_file_folder, output_json_file_name), mode='w') as fw:
if do_nd_json == 'Y':
print('NDJson')
fw.writelines([json.dumps(record) + '\n' for record in data_list])
else:
print('Standard "prettified" JSON')
json.dump(data_list, fw, indent=4)
# ###################################
# BEGIN: "MAIN" for the command line
# ###################################
if __name__ == "__main__":
print('Starting csv_to_json.py')
# make sure we get the right number of arguments (5)
if(len(sys.argv) != 6):
# First "sys.argv" argument (discarded) is always the path to this ".py" file and is required/non-editable.
# Second "sys.argv" argument (.get(0)) should be the input folder
# Third "sys.argv" argument (.get(1)) should be the input filename
# Fourth "sys.argv" argument (.get(2)) should be the output folder
# Fifth "sys.argv" argument (.get(3)) should be the output filename
# Sixth "sys.argv" argument (.get(4)) should be "Y" or "N" -- whether to format the output as newline-delimited JSON ("Y") or ordinary "pretty" JSON ("N")
print('Invalid number of arguments provided.')
print('Usage:')
print(' python csv_to_json.py $input_csv_file_folder $input_csv_file_name $output_json_file_folder $output_json_file_name')
exit(1)
# Parse the command-line interface arguments (throwing away this file's name)
args_dict = dict(enumerate(sys.argv[1:]))
# Execute the appropriate method
__translate_file(args_dict.get(0), args_dict.get(1), args_dict.get(2), args_dict.get(3), args_dict.get(4))
print('Ending csv_to_json.py')