From 2a746a3ec2b923ffa63941d3301345930d73ca30 Mon Sep 17 00:00:00 2001 From: SophiaFrancis1 <112432333+SophiaFrancis1@users.noreply.github.com> Date: Fri, 8 Sep 2023 09:35:17 -0500 Subject: [PATCH 1/3] PEP8ComplianceUpdate --- .vscode/settings.json | 6 + .../audio_processor_mainfunc.py | 377 +++++--- .../audio_processor_parameters.py | 144 +++- .../audio_processor_supportfuncs.py | 429 +++++---- .../audio_processor/corefuncs.py | 115 +-- AV/Film/dpx2ffv1/dpx2ffv1/corefuncs.py | 131 +-- AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1.py | 248 ++++-- .../dpx2ffv1/dpx2ffv1/dpx2ffv1parameters.py | 108 ++- .../dpx2ffv1/dpx2ffv1/dpx2ffv1supportfuncs.py | 158 +++- .../aja_mov2ffv1/aja_mov2ffv1/aja_mov2ffv1.py | 6 +- .../aja_mov2ffv1/aja_mov2ffv1/corefuncs.py | 102 ++- .../aja_mov2ffv1/equipment_dict.py | 58 +- .../aja_mov2ffv1/mov2ffv1mainfunc.py | 345 +++++--- .../aja_mov2ffv1/mov2ffv1parameters.py | 140 ++- .../aja_mov2ffv1/mov2ffv1passfail_checks.py | 29 +- .../aja_mov2ffv1/mov2ffv1supportfuncs.py | 813 ++++++++++++------ AV/Video/aja_mov2ffv1/run.py | 7 +- AV/Video/aja_mov2ffv1/setup.py | 29 +- Image/Inventory/inventory_script.py | 776 +++++++++++------ Image/iqc/iqc/iqc.py | 669 +++++++++----- Image/iqc/iqc/iqcparameters.py | 103 ++- Image/iqc/run.py | 4 + Image/iqc/setup.py | 41 +- .../image_csv_script/image_csv_script.py | 3 +- .../image_csv_script/parameters.py | 28 +- Meadow/mig_av/mig_av/mig_av.py | 563 +++++++----- Meadow/mig_av/mig_av/mig_av_parameters.py | 76 +- Meadow/mig_av/mig_av/mig_av_parser_funcs.py | 114 ++- Meadow/mig_av/run.py | 4 + 29 files changed, 3778 insertions(+), 1848 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d99f2f3 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/AV/Audio/audio_processor/audio_processor/audio_processor_mainfunc.py b/AV/Audio/audio_processor/audio_processor/audio_processor_mainfunc.py index 8d5da8f..348896b 100755 --- a/AV/Audio/audio_processor/audio_processor/audio_processor_mainfunc.py +++ b/AV/Audio/audio_processor/audio_processor/audio_processor_mainfunc.py @@ -11,35 +11,44 @@ from audio_processor import audio_processor_supportfuncs from audio_processor import corefuncs + def audio_processor_main(): - pm_identifier = 'p' - ac_identifier = 'a' - metadata_identifier = 'meta' - preservation_extension = '.wav' - access_extension = '.wav' - inventoryName = 'transcode_inventory.csv' + pm_identifier = "p" + ac_identifier = "a" + metadata_identifier = "meta" + preservation_extension = ".wav" + access_extension = ".wav" + inventoryName = "transcode_inventory.csv" - #assign mediaconch policies to use + # assign mediaconch policies to use if not args.input_policy: - p_wav_policy = os.path.join(os.path.dirname(__file__), 'data/mediaconch_policies/preservation_wav-96k24-tech.xml') + p_wav_policy = os.path.join( + os.path.dirname(__file__), + "data/mediaconch_policies/preservation_wav-96k24-tech.xml", + ) else: p_wav_policy = args.input_policy if not args.output_policy: - a_wav_policy = os.path.join(os.path.dirname(__file__), 'data/mediaconch_policies/access_wav-44k16-tech.xml') + a_wav_policy = os.path.join( + os.path.dirname(__file__), + "data/mediaconch_policies/access_wav-44k16-tech.xml", + ) else: a_wav_policy = args.output_policy - bwf_policy = os.path.join(os.path.dirname(__file__), 'data/mediaconch_policies/wav-bwf.xml') + bwf_policy = os.path.join( + os.path.dirname(__file__), "data/mediaconch_policies/wav-bwf.xml" + ) - #assign input and output + # assign input and output indir = corefuncs.input_check() if args.output_path: qc_csv_file = args.output_path else: base_folder_name = os.path.basename(indir) - qc_csv_file = os.path.join(indir, base_folder_name + '-qc_log.csv') + qc_csv_file = os.path.join(indir, base_folder_name + "-qc_log.csv") corefuncs.output_check(qc_csv_file) - #check that required programs are present + # check that required programs are present corefuncs.mediaconch_check() corefuncs.ffprobe_check() if args.transcode: @@ -48,30 +57,36 @@ def audio_processor_main(): metaedit_version = corefuncs.get_bwf_metaedit_version() sox_version = corefuncs.get_sox_version() - reference_inventory_file = os.path.join(os.path.dirname(__file__), 'data/inventory_reference.csv') - reference_inventory_list = audio_processor_supportfuncs.load_reference_inventory(reference_inventory_file) - #verify that mediaconch policies are present + reference_inventory_file = os.path.join( + os.path.dirname(__file__), "data/inventory_reference.csv" + ) + reference_inventory_list = audio_processor_supportfuncs.load_reference_inventory( + reference_inventory_file + ) + # verify that mediaconch policies are present corefuncs.mediaconch_policy_exists(p_wav_policy) corefuncs.mediaconch_policy_exists(a_wav_policy) csvInventory = os.path.join(indir, inventoryName) - #TO DO: separate out csv and json related functions that are currently in supportfuncs into dedicated csv or json related py files - #csvDict = audio_processor_supportfuncs.import_csv(csvInventory) - #create the list of csv headers that will go in the qc log csv file + # TO DO: separate out csv and json related functions that are currently in supportfuncs into dedicated csv or json related py files + # csvDict = audio_processor_supportfuncs.import_csv(csvInventory) + # create the list of csv headers that will go in the qc log csv file - #importing inventories + # importing inventories if args.source_inventory: source_inventories = args.source_inventory - source_inventory_dict = audio_processor_supportfuncs.import_inventories(source_inventories, reference_inventory_list) + source_inventory_dict = audio_processor_supportfuncs.import_inventories( + source_inventories, reference_inventory_list + ) else: - print('\n*** Checking input directory for CSV files ***') + print("\n*** Checking input directory for CSV files ***") source_inventories = glob.glob(os.path.join(indir, "*.csv")) - source_inventories = [i for i in source_inventories if not 'qc_log.csv' in i] + source_inventories = [i for i in source_inventories if not "qc_log.csv" in i] if not source_inventories: print("\n+++ WARNING: Unable to CSV inventory file +++") print("CONTINUE? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: source_inventory_dict = {} @@ -80,34 +95,36 @@ def audio_processor_main(): else: sys.stdout.write("Please respond with 'yes' or 'no'") quit() - #rather than quitting - prompt user to choose whether or not to continue + # rather than quitting - prompt user to choose whether or not to continue else: print("Inventories found\n") - source_inventory_dict = audio_processor_supportfuncs.import_inventories(source_inventories, reference_inventory_list) + source_inventory_dict = audio_processor_supportfuncs.import_inventories( + source_inventories, reference_inventory_list + ) csvHeaderList = [ - "filename", - "Shot Sheet Check", - "Date", - "File Format & Metadata Verification", - "Date", - "File Inspection", - "Date", - "QC Notes", - "Runtime" + "filename", + "Shot Sheet Check", + "Date", + "File Format & Metadata Verification", + "Date", + "File Inspection", + "Date", + "QC Notes", + "Runtime", ] - print ("***STARTING PROCESS***") + print("***STARTING PROCESS***") object_list = audio_processor_supportfuncs.get_immediate_subdirectories(indir) - #load bwf metadata into dictionary + # load bwf metadata into dictionary if args.write_bwf_metadata: - #TODO check that bwf_metaedit is installed - bwf_file = os.path.join(os.path.dirname(__file__), 'data/bwf_metadata.json') + # TODO check that bwf_metaedit is installed + bwf_file = os.path.join(os.path.dirname(__file__), "data/bwf_metadata.json") with open(bwf_file) as standard_metadata: bwf_dict = json.load(standard_metadata) -#TODO add earlier failure to end process if all files do not have corresponding inventory entries + # TODO add earlier failure to end process if all files do not have corresponding inventory entries for object in object_list: object_folder_abspath = os.path.join(indir, object) @@ -116,145 +133,237 @@ def audio_processor_main(): for file in glob.glob1(pm_folder_abspath, "*" + preservation_extension): pm_file_abspath = os.path.join(pm_folder_abspath, file) if not file.endswith(pm_identifier + preservation_extension): - print('WARNING: Error processing preservation files') - print('Your input files do not end with the expected identifier or have a different extension than was expected') + print("WARNING: Error processing preservation files") + print( + "Your input files do not end with the expected identifier or have a different extension than was expected" + ) quit() else: - base_filename = file.replace(pm_identifier + preservation_extension, '') + base_filename = file.replace( + pm_identifier + preservation_extension, "" + ) ac_folder_abspath = os.path.join(object_folder_abspath, ac_identifier) - ac_file_abspath = os.path.join(ac_folder_abspath, base_filename + ac_identifier + access_extension) - meta_folder_abspath = os.path.join(object_folder_abspath, metadata_identifier) - pm_md5_abspath = pm_file_abspath.replace(preservation_extension, '.md5') - ac_md5_abspath = ac_file_abspath.replace(access_extension, '.md5') + ac_file_abspath = os.path.join( + ac_folder_abspath, base_filename + ac_identifier + access_extension + ) + meta_folder_abspath = os.path.join( + object_folder_abspath, metadata_identifier + ) + pm_md5_abspath = pm_file_abspath.replace(preservation_extension, ".md5") + ac_md5_abspath = ac_file_abspath.replace(access_extension, ".md5") print("Processing " + file) - #load inventory metadata related to the file - loaded_metadata = audio_processor_supportfuncs.load_item_metadata(file, source_inventory_dict) - #loading inventory metadata means the item was found in the inventory - inventory_check = 'PASS' + # load inventory metadata related to the file + loaded_metadata = audio_processor_supportfuncs.load_item_metadata( + file, source_inventory_dict + ) + # loading inventory metadata means the item was found in the inventory + inventory_check = "PASS" inventory_filename = [] for key in loaded_metadata: inventory_filename.append(key) - inventory_filename = ''.join(inventory_filename) + inventory_filename = "".join(inventory_filename) - #json filename should use the filename found in the inventory - json_file_abspath = os.path.join(meta_folder_abspath, inventory_filename + '-' + metadata_identifier + '.json') + # json filename should use the filename found in the inventory + json_file_abspath = os.path.join( + meta_folder_abspath, + inventory_filename + "-" + metadata_identifier + ".json", + ) - #generate ffprobe metadata from input - input_metadata = audio_processor_supportfuncs.ffprobe_report(file, pm_file_abspath) + # generate ffprobe metadata from input + input_metadata = audio_processor_supportfuncs.ffprobe_report( + file, pm_file_abspath + ) - #embed BWF metadata + # embed BWF metadata if args.write_bwf_metadata: print("*embedding BWF metadata*") - inventory_bwf_metadata = loaded_metadata[inventory_filename]['BWF Metadata'] - source_format = inventory_bwf_metadata['Format'].lower() - bwf_dict['ISRF']['write'] = source_format - #TODO coding history needs to be updated accordingly - coding_history = inventory_bwf_metadata['Coding History'] - if input_metadata['file metadata']['channels'] == 1: - file_sound_mode = 'mono' - elif input_metadata['file metadata']['channels'] == 2: - file_sound_mode = 'stereo' + inventory_bwf_metadata = loaded_metadata[inventory_filename][ + "BWF Metadata" + ] + source_format = inventory_bwf_metadata["Format"].lower() + bwf_dict["ISRF"]["write"] = source_format + # TODO coding history needs to be updated accordingly + coding_history = inventory_bwf_metadata["Coding History"] + if input_metadata["file metadata"]["channels"] == 1: + file_sound_mode = "mono" + elif input_metadata["file metadata"]["channels"] == 2: + file_sound_mode = "stereo" else: - #TODO prompt user to enter a sound mode for the file manually? + # TODO prompt user to enter a sound mode for the file manually? pass - coding_history_update = 'A=PCM,F=' + input_metadata['file metadata']['audio sample rate'] + ',W=' + input_metadata['file metadata']['audio bitrate'] + ',M=' + file_sound_mode + ',T=BWFMetaEdit ' + metaedit_version - coding_history = coding_history + '\r\n' + coding_history_update - bwf_dict['CodingHistory']['write'] = coding_history - bwf_command = [args.metaedit_path, pm_file_abspath, '--MD5-Embed', '--BextVersion=1'] + coding_history_update = ( + "A=PCM,F=" + + input_metadata["file metadata"]["audio sample rate"] + + ",W=" + + input_metadata["file metadata"]["audio bitrate"] + + ",M=" + + file_sound_mode + + ",T=BWFMetaEdit " + + metaedit_version + ) + coding_history = coding_history + "\r\n" + coding_history_update + bwf_dict["CodingHistory"]["write"] = coding_history + bwf_command = [ + args.metaedit_path, + pm_file_abspath, + "--MD5-Embed", + "--BextVersion=1", + ] for key in bwf_dict: - if bwf_dict[key]['write']: - bwf_command += [bwf_dict[key]['command'] + bwf_dict[key]['write']] - #if args.reset_timereference: + if bwf_dict[key]["write"]: + bwf_command += [ + bwf_dict[key]["command"] + bwf_dict[key]["write"] + ] + # if args.reset_timereference: # bwf_command += ['--Timereference=' + '0'] subprocess.run(bwf_command) - #print(bwf_command) + # print(bwf_command) - #create checksum sidecar file for preservation master - print ("*creating checksum for preservation file*") + # create checksum sidecar file for preservation master + print("*creating checksum for preservation file*") pm_hash = corefuncs.hashlib_md5(pm_file_abspath) - with open (pm_md5_abspath, 'w', newline='\n') as f: - print(pm_hash, '*' + file, file=f) + with open(pm_md5_abspath, "w", newline="\n") as f: + print(pm_hash, "*" + file, file=f) if args.transcode: print("*transcoding access file*") audio_processor_supportfuncs.create_output_folder(ac_folder_abspath) - ffmpeg_command = [args.ffmpeg_path, '-loglevel', 'error', '-i', pm_file_abspath] - ffmpeg_command += ['-af', 'aresample=resampler=soxr', '-ar', '44100', '-c:a', 'pcm_s16le', '-write_bext', '1', ac_file_abspath] - #sox_command = [args.sox_path, pm_file_abspath, '-b', '16', ac_file_abspath, 'rate', '44100'] + ffmpeg_command = [ + args.ffmpeg_path, + "-loglevel", + "error", + "-i", + pm_file_abspath, + ] + ffmpeg_command += [ + "-af", + "aresample=resampler=soxr", + "-ar", + "44100", + "-c:a", + "pcm_s16le", + "-write_bext", + "1", + ac_file_abspath, + ] + # sox_command = [args.sox_path, pm_file_abspath, '-b', '16', ac_file_abspath, 'rate', '44100'] subprocess.run(ffmpeg_command) - #generate md5 for access file + # generate md5 for access file print("*creating checksum for access file*") acHash = corefuncs.hashlib_md5(ac_file_abspath) - with open (os.path.join(ac_md5_abspath), 'w', newline='\n') as f: - print(acHash, '*' + base_filename + ac_identifier + access_extension, file=f) + with open(os.path.join(ac_md5_abspath), "w", newline="\n") as f: + print( + acHash, + "*" + base_filename + ac_identifier + access_extension, + file=f, + ) - #create folder for metadata if needed + # create folder for metadata if needed if args.spectrogram or args.write_json: - audio_processor_supportfuncs.create_output_folder(meta_folder_abspath) + audio_processor_supportfuncs.create_output_folder( + meta_folder_abspath + ) - #create spectrogram for pm audio channels + # create spectrogram for pm audio channels if args.spectrogram: - #TODO handle cases where spectrogram files already exist - print ("*generating QC spectrograms*") - sox_spectrogram_command = [args.sox_path, pm_file_abspath, '-n', 'spectrogram', '-Y', '1080', '-x', '1920', '-o', os.path.join(meta_folder_abspath, base_filename + 'spectrogram' + '.png')] + # TODO handle cases where spectrogram files already exist + print("*generating QC spectrograms*") + sox_spectrogram_command = [ + args.sox_path, + pm_file_abspath, + "-n", + "spectrogram", + "-Y", + "1080", + "-x", + "1920", + "-o", + os.path.join( + meta_folder_abspath, base_filename + "spectrogram" + ".png" + ), + ] subprocess.run(sox_spectrogram_command) - #channel_layout = input_metadata['file metadata']['channels'] - #audio_processor_supportfuncs.generate_spectrogram(pm_file_abspath, channel_layout, meta_folder_abspath, base_filename) + # channel_layout = input_metadata['file metadata']['channels'] + # audio_processor_supportfuncs.generate_spectrogram(pm_file_abspath, channel_layout, meta_folder_abspath, base_filename) - #TODO make this able to handle cases where there is no access file - #TODO split BWF metadata checks into separate policies? - #create a dictionary with the mediaconch results + # TODO make this able to handle cases where there is no access file + # TODO split BWF metadata checks into separate policies? + # create a dictionary with the mediaconch results print("*Running MediaConch on Preservation and Access files*") mediaconchResults_dict = { - 'Preservation Format Policy': audio_processor_supportfuncs.mediaconch_policy_check(pm_file_abspath, p_wav_policy), - 'Preservation BWF Policy' : audio_processor_supportfuncs.mediaconch_policy_check(pm_file_abspath, bwf_policy), - 'Access Format Policy': audio_processor_supportfuncs.mediaconch_policy_check(ac_file_abspath, a_wav_policy), - 'Access BWF Policy' : audio_processor_supportfuncs.mediaconch_policy_check(pm_file_abspath, bwf_policy) + "Preservation Format Policy": audio_processor_supportfuncs.mediaconch_policy_check( + pm_file_abspath, p_wav_policy + ), + "Preservation BWF Policy": audio_processor_supportfuncs.mediaconch_policy_check( + pm_file_abspath, bwf_policy + ), + "Access Format Policy": audio_processor_supportfuncs.mediaconch_policy_check( + ac_file_abspath, a_wav_policy + ), + "Access BWF Policy": audio_processor_supportfuncs.mediaconch_policy_check( + pm_file_abspath, bwf_policy + ), } - #PASS/FAIL - check if any mediaconch results failed and append failed policies to results - mediaconchResults = audio_processor_supportfuncs.parse_mediaconchResults(mediaconchResults_dict) + # PASS/FAIL - check if any mediaconch results failed and append failed policies to results + mediaconchResults = ( + audio_processor_supportfuncs.parse_mediaconchResults( + mediaconchResults_dict + ) + ) - #systemInfo = audio_processor_supportfuncs.generate_system_log() + # systemInfo = audio_processor_supportfuncs.generate_system_log() - #create a dictionary containing QC results - qcResults = audio_processor_supportfuncs.qc_results(inventory_check, mediaconchResults) + # create a dictionary containing QC results + qcResults = audio_processor_supportfuncs.qc_results( + inventory_check, mediaconchResults + ) - #TODO use bwfmetaedit --out-core and --out-tech to grab the BWF metadata, then translate csv data to dict + # TODO use bwfmetaedit --out-core and --out-tech to grab the BWF metadata, then translate csv data to dict if args.write_json: - #TODO consider using --out-tech to get technical metadata instead of ffmpeg? - bwf_meta_dict = audio_processor_supportfuncs.get_bwf_metadata(pm_file_abspath) - #input_metadata['file_metadata'].pop('Format') - file_dict = {file : {}} - file_dict[file].update({'Technical Metadata' : input_metadata['file metadata']}) - file_dict[file].update({'BWF Metadata' : bwf_meta_dict}) + # TODO consider using --out-tech to get technical metadata instead of ffmpeg? + bwf_meta_dict = audio_processor_supportfuncs.get_bwf_metadata( + pm_file_abspath + ) + # input_metadata['file_metadata'].pop('Format') + file_dict = {file: {}} + file_dict[file].update( + {"Technical Metadata": input_metadata["file metadata"]} + ) + file_dict[file].update({"BWF Metadata": bwf_meta_dict}) file_dict[file].update(qcResults) - output_metadata = loaded_metadata[inventory_filename]['Inventory Metadata'] - if 'Preservation Files' not in output_metadata: - output_metadata['Preservation Files'] = [file_dict] + output_metadata = loaded_metadata[inventory_filename][ + "Inventory Metadata" + ] + if "Preservation Files" not in output_metadata: + output_metadata["Preservation Files"] = [file_dict] else: - output_metadata['Preservation Files'].append(file_dict) - with open(json_file_abspath, 'w', newline='\n') as outfile: + output_metadata["Preservation Files"].append(file_dict) + with open(json_file_abspath, "w", newline="\n") as outfile: json.dump(output_metadata, outfile, indent=4) - #get current date for logging when QC happned - qcDate = str(datetime.datetime.today().strftime('%Y-%m-%d')) + # get current date for logging when QC happned + qcDate = str(datetime.datetime.today().strftime("%Y-%m-%d")) - #TODO multi-part/side files need cumulative runtime - #create the list that will go in the qc log csv file - #should correspond to the csvHeaderList earlier in the script + # TODO multi-part/side files need cumulative runtime + # create the list that will go in the qc log csv file + # should correspond to the csvHeaderList earlier in the script csvWriteList = [ - file, - qcResults['QC']['Inventory Check'], - qcDate, - qcResults['QC']['Mediaconch Results'], - qcDate, - None, - None, - None, - audio_processor_supportfuncs.convert_runtime(input_metadata['file metadata']['duration']) + file, + qcResults["QC"]["Inventory Check"], + qcDate, + qcResults["QC"]["Mediaconch Results"], + qcDate, + None, + None, + None, + audio_processor_supportfuncs.convert_runtime( + input_metadata["file metadata"]["duration"] + ), ] - #Add QC results to QC log csv file - audio_processor_supportfuncs.write_output_csv(qc_csv_file, csvHeaderList, csvWriteList, qcResults) \ No newline at end of file + # Add QC results to QC log csv file + audio_processor_supportfuncs.write_output_csv( + qc_csv_file, csvHeaderList, csvWriteList, qcResults + ) diff --git a/AV/Audio/audio_processor/audio_processor/audio_processor_parameters.py b/AV/Audio/audio_processor/audio_processor/audio_processor_parameters.py index a286a9c..414cd2e 100644 --- a/AV/Audio/audio_processor/audio_processor/audio_processor_parameters.py +++ b/AV/Audio/audio_processor/audio_processor/audio_processor_parameters.py @@ -1,32 +1,136 @@ #!/usr/bin/env python3 -''' +""" Argument parser for in-house AJA v210/mov to ffv1/mkv script -''' +""" import argparse import sys parser = argparse.ArgumentParser() -parser.add_argument('--input', '-i', action='store', dest='input_path', type=str, help='full path to input folder') -parser.add_argument('--output', '-o', action='store', dest='output_path', type=str, help='full path to output csv file for QC results. If not specified this will default to creating a file in the input directory') -parser.add_argument('--load_inventory', '-l', required=False, nargs='*', action='store', dest='source_inventory', help='Use to specify a CSV inventory. If not specified the script will look in the base folder of the input for CSV inventories. If no inventories are found the script will leave some fields blank.') -parser.add_argument('--sox', action='store', dest='sox_path', default='sox', type=str, help='For setting a custom sox path') -parser.add_argument('--bwfmetaedit', action='store', dest='metaedit_path', default='bwfmetaedit', type=str, help='For setting a custom BWF Metaedit path') -parser.add_argument('--ffmpeg', action='store', dest='ffmpeg_path', default='ffmpeg', type=str, help='For setting a custom ffmpeg path') -parser.add_argument('--ffprobe', action='store', dest='ffprobe_path', default='ffprobe', type=str, help='For setting a custom ffprobe path') -parser.add_argument('--mediaconch', action='store', dest='mediaconch_path', default='mediaconch', type=str, help='For setting a custom mediaconch path') -#parser.add_argument('--verbose', required=False, action='store_true', help='view ffmpeg output when transcoding') -parser.add_argument('--transcode', '-t', required=False, action='store_true', dest='transcode', help='Transcode access files') -parser.add_argument('--write_metadata', '-m', required=False, action='store_true', dest='write_bwf_metadata', help='Write Broadcast WAVE metadata to Preservation file') -#parser.add_argument('--reset_timereference', '-r', required=False, action='store_true', dest='reset_timereference', help='Reset the time reference of a BWF file to 00:00:00.000') -parser.add_argument('--write_json', '-j', required=False, action='store_true', dest='write_json', help='Write metadata to json file') -#parser.add_argument('--skipac', required=False, action='store_true', dest='skip_ac', help='skip access copy transcoding') -parser.add_argument('--spectrogram', '-s', required=False, action='store_true', dest='spectrogram', help='generate spectrograms') -parser.add_argument('--p_policy', required=False, action='store', dest='input_policy', help='Mediaconch policy for preservation files') -parser.add_argument('--a_policy', required=False, action='store', dest='output_policy', help='Mediaconch policy for access files') -parser.add_argument('--all', '-a', required=False, action='store_true', dest='all', help='This is equivalent to using -t -m -j -s') +parser.add_argument( + "--input", + "-i", + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output csv file for QC results. If not specified this will default to creating a file in the input directory", +) +parser.add_argument( + "--load_inventory", + "-l", + required=False, + nargs="*", + action="store", + dest="source_inventory", + help="Use to specify a CSV inventory. If not specified the script will look in the base folder of the input for CSV inventories. If no inventories are found the script will leave some fields blank.", +) +parser.add_argument( + "--sox", + action="store", + dest="sox_path", + default="sox", + type=str, + help="For setting a custom sox path", +) +parser.add_argument( + "--bwfmetaedit", + action="store", + dest="metaedit_path", + default="bwfmetaedit", + type=str, + help="For setting a custom BWF Metaedit path", +) +parser.add_argument( + "--ffmpeg", + action="store", + dest="ffmpeg_path", + default="ffmpeg", + type=str, + help="For setting a custom ffmpeg path", +) +parser.add_argument( + "--ffprobe", + action="store", + dest="ffprobe_path", + default="ffprobe", + type=str, + help="For setting a custom ffprobe path", +) +parser.add_argument( + "--mediaconch", + action="store", + dest="mediaconch_path", + default="mediaconch", + type=str, + help="For setting a custom mediaconch path", +) +# parser.add_argument('--verbose', required=False, action='store_true', help='view ffmpeg output when transcoding') +parser.add_argument( + "--transcode", + "-t", + required=False, + action="store_true", + dest="transcode", + help="Transcode access files", +) +parser.add_argument( + "--write_metadata", + "-m", + required=False, + action="store_true", + dest="write_bwf_metadata", + help="Write Broadcast WAVE metadata to Preservation file", +) +# parser.add_argument('--reset_timereference', '-r', required=False, action='store_true', dest='reset_timereference', help='Reset the time reference of a BWF file to 00:00:00.000') +parser.add_argument( + "--write_json", + "-j", + required=False, + action="store_true", + dest="write_json", + help="Write metadata to json file", +) +# parser.add_argument('--skipac', required=False, action='store_true', dest='skip_ac', help='skip access copy transcoding') +parser.add_argument( + "--spectrogram", + "-s", + required=False, + action="store_true", + dest="spectrogram", + help="generate spectrograms", +) +parser.add_argument( + "--p_policy", + required=False, + action="store", + dest="input_policy", + help="Mediaconch policy for preservation files", +) +parser.add_argument( + "--a_policy", + required=False, + action="store", + dest="output_policy", + help="Mediaconch policy for access files", +) +parser.add_argument( + "--all", + "-a", + required=False, + action="store_true", + dest="all", + help="This is equivalent to using -t -m -j -s", +) args = parser.parse_args() diff --git a/AV/Audio/audio_processor/audio_processor/audio_processor_supportfuncs.py b/AV/Audio/audio_processor/audio_processor/audio_processor_supportfuncs.py index b4bc939..48643c7 100644 --- a/AV/Audio/audio_processor/audio_processor/audio_processor_supportfuncs.py +++ b/AV/Audio/audio_processor/audio_processor/audio_processor_supportfuncs.py @@ -11,127 +11,194 @@ import time from audio_processor.audio_processor_parameters import args + def get_immediate_subdirectories(folder): - ''' + """ get list of immediate subdirectories of input - ''' - return [name for name in os.listdir(folder) - if os.path.isdir(os.path.join(folder, name))] + """ + return [ + name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name)) + ] + def create_output_folder(folder): if not os.path.isdir(folder): try: os.mkdir(folder) except: - print ("unable to create output folder:", folder) + print("unable to create output folder:", folder) quit() else: - print ("using existing folder", folder, "as output") + print("using existing folder", folder, "as output") + def delete_files(list): - ''' + """ Loops through a list of files and tries to delete them - ''' + """ for i in list: try: os.remove(i) except FileNotFoundError: - print ("unable to delete " + i) - print ("File not found") + print("unable to delete " + i) + print("File not found") + def load_reference_inventory(reference_inventory_file): reference_inventory_fieldnames = [] with open(reference_inventory_file, "r") as f: - reader = csv.DictReader(f, delimiter=',') + reader = csv.DictReader(f, delimiter=",") reference_inventory_fieldnames.extend(reader.fieldnames) - return(reference_inventory_fieldnames) + return reference_inventory_fieldnames + def load_item_metadata(file, source_inventory_dict): - #TODO error out if multiple matches are found + # TODO error out if multiple matches are found loaded_metadata = {} for item in source_inventory_dict: if item in file: - loaded_metadata = {item : source_inventory_dict[item]} + loaded_metadata = {item: source_inventory_dict[item]} if not loaded_metadata: print("ERROR: Unable to find matching file for " + file) quit() return loaded_metadata + def ffprobe_report(filename, input_file_abspath): - ''' + """ returns nested dictionary with ffprobe metadata - ''' - audio_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'a', '-show_entries', 'stream=codec_long_name,bits_per_raw_sample,sample_rate,channels', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - format_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-show_entries', 'format=duration,size,nb_streams', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) + """ + audio_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=codec_long_name,bits_per_raw_sample,sample_rate,channels", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + format_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-show_entries", + "format=duration,size,nb_streams", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) - audio_codec_name_list = [stream.get('codec_long_name') for stream in (audio_output['streams'])][0] - audio_bitrate = [stream.get('bits_per_raw_sample') for stream in (audio_output['streams'])][0] - audio_sample_rate = [stream.get('sample_rate') for stream in (audio_output['streams'])][0] - audio_channels = [stream.get('channels') for stream in (audio_output['streams'])][0] + audio_codec_name_list = [ + stream.get("codec_long_name") for stream in (audio_output["streams"]) + ][0] + audio_bitrate = [ + stream.get("bits_per_raw_sample") for stream in (audio_output["streams"]) + ][0] + audio_sample_rate = [ + stream.get("sample_rate") for stream in (audio_output["streams"]) + ][0] + audio_channels = [stream.get("channels") for stream in (audio_output["streams"])][0] file_metadata = { - #'filename' : filename, - 'file size' : format_output.get('format')['size'], - 'duration' : format_output.get('format')['duration'], - 'streams' : format_output.get('format')['nb_streams'], - 'channels' : audio_channels, - 'audio streams' : audio_codec_name_list, - 'audio sample rate' : audio_sample_rate, - 'audio bitrate' : audio_bitrate + #'filename' : filename, + "file size": format_output.get("format")["size"], + "duration": format_output.get("format")["duration"], + "streams": format_output.get("format")["nb_streams"], + "channels": audio_channels, + "audio streams": audio_codec_name_list, + "audio sample rate": audio_sample_rate, + "audio bitrate": audio_bitrate, } - ffprobe_metadata = {'file metadata' : file_metadata} + ffprobe_metadata = {"file metadata": file_metadata} return ffprobe_metadata + def generate_spectrogram(input, channel_layout, outputFolder, outputName): - ''' + """ Creates a spectrogram for each audio track in the input - ''' + """ spectrogram_resolution = "1928x1080" - output = os.path.join(outputFolder, outputName + '_0a0' + '-spectrogram' + '.png') + output = os.path.join(outputFolder, outputName + "_0a0" + "-spectrogram" + ".png") spectrogram_args = [args.ffmpeg_path] - spectrogram_args += ['-loglevel', 'error', '-y'] - spectrogram_args += ['-i', input, '-lavfi'] + spectrogram_args += ["-loglevel", "error", "-y"] + spectrogram_args += ["-i", input, "-lavfi"] if channel_layout > 1: - spectrogram_args += ['[0:a:%(a)s]showspectrumpic=mode=separate:s=%(b)s' % {"a" : '0', "b" : spectrogram_resolution}] + spectrogram_args += [ + "[0:a:%(a)s]showspectrumpic=mode=separate:s=%(b)s" + % {"a": "0", "b": spectrogram_resolution} + ] else: - spectrogram_args += ['[0:a:%(a)s]showspectrumpic=s=%(b)s' % {"a" : '0', "b" : spectrogram_resolution}] + spectrogram_args += [ + "[0:a:%(a)s]showspectrumpic=s=%(b)s" + % {"a": "0", "b": spectrogram_resolution} + ] spectrogram_args += [output] subprocess.run(spectrogram_args) + def mediaconch_policy_check(input, policy): - mediaconchResults = subprocess.check_output([args.mediaconch_path, '--policy=' + policy, input]).decode("ascii").rstrip().split()[0] + mediaconchResults = ( + subprocess.check_output([args.mediaconch_path, "--policy=" + policy, input]) + .decode("ascii") + .rstrip() + .split()[0] + ) if mediaconchResults == "pass!": mediaconchResults = "PASS" else: mediaconchResults = "FAIL" return mediaconchResults + def mediaconch_implementation_check(input): - mediaconchResults = subprocess.check_output([args.mediaconch_path, input]).decode("ascii").rstrip().split()[0] + mediaconchResults = ( + subprocess.check_output([args.mediaconch_path, input]) + .decode("ascii") + .rstrip() + .split()[0] + ) if mediaconchResults == "pass!": mediaconchResults = "PASS" else: mediaconchResults = "FAIL" return mediaconchResults + def generate_system_log(): - #gather system info for json output + # gather system info for json output osinfo = platform.platform() systemInfo = { - 'operating system': osinfo, + "operating system": osinfo, } return systemInfo + def qc_results(inventoryCheck, mediaconchResults): QC_results = {} - QC_results['QC'] = { - 'Inventory Check': inventoryCheck, - 'Mediaconch Results': mediaconchResults + QC_results["QC"] = { + "Inventory Check": inventoryCheck, + "Mediaconch Results": mediaconchResults, } return QC_results + def guess_date(string): for fmt in ["%m/%d/%Y", "%d-%m-%Y", "%m/%d/%y", "%Y-%m-%d"]: try: @@ -140,11 +207,12 @@ def guess_date(string): continue raise ValueError(string) + def verify_csv_exists(csv_file): - ''' + """ TODO add doctest - ''' - if csv_file.endswith('.csv'): + """ + if csv_file.endswith(".csv"): if not os.path.isfile(csv_file): print("ERROR: Unable to locate " + csv_file) quit() @@ -152,100 +220,129 @@ def verify_csv_exists(csv_file): print("ERROR: " + csv_file + " is not a CSV file") quit() + def group_lists(original_list): - ''' + """ groups list items by the number found in them - ''' + """ grouped_lists = [] for value in original_list: numeric_string = "".join(filter(str.isdigit, value)) - if grouped_lists and "".join(filter(str.isdigit, grouped_lists[-1][0])) == numeric_string: + if ( + grouped_lists + and "".join(filter(str.isdigit, grouped_lists[-1][0])) == numeric_string + ): grouped_lists[-1].append(value) else: grouped_lists.append([value]) return grouped_lists + def create_coding_history(row, encoding_chain_fields, append_list): - #separates out just the number from the encoding chain field - #then compares that to the previous entry in the list so that same numbers are grouped + # separates out just the number from the encoding chain field + # then compares that to the previous entry in the list so that same numbers are grouped grouped_field_list = group_lists(encoding_chain_fields) coding_history_dict = {} coding_history = [] for encoding_chain in grouped_field_list: coding_history_dict = { - 'primary fields' : { - 'coding algorithm' : None, - 'sample rate' : None, - 'word length' : None, - 'sound mode' : None, + "primary fields": { + "coding algorithm": None, + "sample rate": None, + "word length": None, + "sound mode": None, + }, + "freetext": { + "device": None, + "id": None, + "append fields": None, + "ad type": None, }, - 'freetext': { - 'device' : None, - 'id' : None, - 'append fields' : None, - 'ad type' : None - } } for i in encoding_chain: if i.lower().endswith("hardware"): - hardware_parser = row[i].split(';') + hardware_parser = row[i].split(";") hardware_parser = [i.lstrip() for i in hardware_parser] - if len(hardware_parser) !=3: - print("ERROR: Encoding chain hardware does not follow expected formatting") - coding_history_dict['primary fields']['coding algorithm'] = "A=" + hardware_parser[0] - #TODO change how T= is added so it is instead just placed before the first entry of the freetext section - coding_history_dict['freetext']['device'] = "T=" + hardware_parser[1] - coding_history_dict['freetext']['id'] = hardware_parser[2] + if len(hardware_parser) != 3: + print( + "ERROR: Encoding chain hardware does not follow expected formatting" + ) + coding_history_dict["primary fields"]["coding algorithm"] = ( + "A=" + hardware_parser[0] + ) + # TODO change how T= is added so it is instead just placed before the first entry of the freetext section + coding_history_dict["freetext"]["device"] = "T=" + hardware_parser[1] + coding_history_dict["freetext"]["id"] = hardware_parser[2] if i.lower().endswith("mode"): - coding_history_dict['primary fields']['sound mode'] = "M=" + row[i] + coding_history_dict["primary fields"]["sound mode"] = "M=" + row[i] if i.lower().endswith("digital characteristics"): - hardware_parser = row[i].split(';') + hardware_parser = row[i].split(";") hardware_parser = [i.lstrip() for i in hardware_parser] - if len(hardware_parser) !=2: - print("ERROR: Encoding chain digital characteristics does not follow expected formatting") - coding_history_dict['primary fields']['sample rate'] = "F=" + hardware_parser[0] - coding_history_dict['primary fields']['word length'] = "W=" + hardware_parser[1] - if i.lower().endswith("hardware type") and row[i].lower() == "playback deck": + if len(hardware_parser) != 2: + print( + "ERROR: Encoding chain digital characteristics does not follow expected formatting" + ) + coding_history_dict["primary fields"]["sample rate"] = ( + "F=" + hardware_parser[0] + ) + coding_history_dict["primary fields"]["word length"] = ( + "W=" + hardware_parser[1] + ) + if ( + i.lower().endswith("hardware type") + and row[i].lower() == "playback deck" + ): clean_list = [] for field in append_list: if field: clean_list.append(field) if clean_list: - append_fields = '; '.join(clean_list) - #convert append list to string - coding_history_dict['freetext']['append fields'] = append_fields + append_fields = "; ".join(clean_list) + # convert append list to string + coding_history_dict["freetext"]["append fields"] = append_fields elif i.lower().endswith("hardware type"): - coding_history_dict['freetext']['ad type'] = row[i] + coding_history_dict["freetext"]["ad type"] = row[i] primary_fields = [] freetext = [] - for key in coding_history_dict['primary fields']: - if coding_history_dict['primary fields'][key]: - primary_fields.append(coding_history_dict['primary fields'][key]) - for key in coding_history_dict['freetext']: - if coding_history_dict['freetext'][key]: - freetext.append(coding_history_dict['freetext'][key]) + for key in coding_history_dict["primary fields"]: + if coding_history_dict["primary fields"][key]: + primary_fields.append(coding_history_dict["primary fields"][key]) + for key in coding_history_dict["freetext"]: + if coding_history_dict["freetext"][key]: + freetext.append(coding_history_dict["freetext"][key]) if primary_fields and freetext: - coding_history_p = ','.join(primary_fields) - coding_history_t = '; '.join(freetext) - coding_history_component = coding_history_p + ',' + coding_history_t + coding_history_p = ",".join(primary_fields) + coding_history_t = "; ".join(freetext) + coding_history_component = coding_history_p + "," + coding_history_t coding_history.append(coding_history_component) - coding_history = '\r\n'.join(coding_history) - return(coding_history) + coding_history = "\r\n".join(coding_history) + return coding_history + def import_inventories(source_inventories, reference_inventory_list): csvDict = {} skip_coding_history = False for i in source_inventories: verify_csv_exists(i) - with open(i, encoding='utf-8')as f: - reader = csv.DictReader(f, delimiter=',') - cleaned_fieldnames = [a for a in reader.fieldnames if not "encoding chain" in a.lower()] - encoding_chain_fields = sorted([a for a in reader.fieldnames if "encoding chain" in a.lower()]) - missing_fieldnames = [i for i in reference_inventory_list if not i in cleaned_fieldnames] - extra_fieldnames = [i for i in cleaned_fieldnames if not i in reference_inventory_list] + with open(i, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=",") + cleaned_fieldnames = [ + a for a in reader.fieldnames if not "encoding chain" in a.lower() + ] + encoding_chain_fields = sorted( + [a for a in reader.fieldnames if "encoding chain" in a.lower()] + ) + missing_fieldnames = [ + i for i in reference_inventory_list if not i in cleaned_fieldnames + ] + extra_fieldnames = [ + i for i in cleaned_fieldnames if not i in reference_inventory_list + ] if missing_fieldnames: - print("WARNING: Your inventory seems to be missing the following columns") + print( + "WARNING: Your inventory seems to be missing the following columns" + ) print(missing_fieldnames) quit() if extra_fieldnames: @@ -255,69 +352,95 @@ def import_inventories(source_inventories, reference_inventory_list): if not encoding_chain_fields: print("WARNING: Unable to find encoding chain fields in inventory") print("Continue without building Coding History? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: - skip_coding_history = True + skip_coding_history = True elif choice in no: - quit() + quit() else: - sys.stdout.write("Please respond with 'yes' or 'no'") - quit() + sys.stdout.write("Please respond with 'yes' or 'no'") + quit() for row in reader: - name = row['filename'] - record_date = row['Record Date/Time'] - container_markings = row['Housing/Container Markings'] - container_markings = container_markings.split('\n') - format = row['Format'].lower() - captureDate = row['Capture Date'] - #try to format date as yyyy-mm-dd if not formatted correctly + name = row["filename"] + record_date = row["Record Date/Time"] + container_markings = row["Housing/Container Markings"] + container_markings = container_markings.split("\n") + format = row["Format"].lower() + captureDate = row["Capture Date"] + # try to format date as yyyy-mm-dd if not formatted correctly if captureDate: captureDate = str(guess_date(captureDate)) - tapeBrand = row['Tape Brand'] - sound = row['Sound'] - type = row['Tape Type (Cassette)'] - nr = row['Noise Reduction'] - speed = row['Speed IPS'] + tapeBrand = row["Tape Brand"] + sound = row["Sound"] + type = row["Tape Type (Cassette)"] + nr = row["Noise Reduction"] + speed = row["Speed IPS"] if skip_coding_history is False: - coding_history = create_coding_history(row, encoding_chain_fields, [tapeBrand, type, speed, nr]) + coding_history = create_coding_history( + row, encoding_chain_fields, [tapeBrand, type, speed, nr] + ) else: coding_history = None - #TODO make a more generic expandable coding history builder - #TODO separate out metadata that is specifically needed for embedding vs json file metadata - csvData = {'Inventory Metadata' : { - 'Work Accession Number' : row['work_accession_number'], - 'Box/Folder/Alma Number' : row['Box/Folder\nAlma number'], - 'Barcode' : row['Barcode'], - 'Inventory Title' : row['inventory_title'], - 'Record Date' : record_date, - 'Container Markings' : container_markings, - 'Condition Notes' : row['Condition Notes'], - 'Digitization Operator' : row['Digitizer'], - 'Capture Date' : captureDate, - 'Sound Note' : sound, - 'Capture Notes' : row['Digitizer Notes'], - }, - 'BWF Metadata' : { - 'Format' : format, - 'Coding History' : coding_history - }} - csvDict.update({name : csvData}) + # TODO make a more generic expandable coding history builder + # TODO separate out metadata that is specifically needed for embedding vs json file metadata + csvData = { + "Inventory Metadata": { + "Work Accession Number": row["work_accession_number"], + "Box/Folder/Alma Number": row["Box/Folder\nAlma number"], + "Barcode": row["Barcode"], + "Inventory Title": row["inventory_title"], + "Record Date": record_date, + "Container Markings": container_markings, + "Condition Notes": row["Condition Notes"], + "Digitization Operator": row["Digitizer"], + "Capture Date": captureDate, + "Sound Note": sound, + "Capture Notes": row["Digitizer Notes"], + }, + "BWF Metadata": { + "Format": format, + "Coding History": coding_history, + }, + } + csvDict.update({name: csvData}) return csvDict + def get_bwf_metadata(pm_file_abspath): - #TODO use bwfmetaedit to get metadata instead - ffprobe_tags = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-show_entries', 'format_tags', pm_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - ffprobe_tags = ffprobe_tags['format']['tags'] - #core_bwf_command = [args.metaedit_path, '--out-core', pm_file_abspath] - tech_bwf_command = [args.metaedit_path, '--out-tech', pm_file_abspath] - #TODO fix - splitlines returns different results here depending on OS - tech_bwf_csv = subprocess.check_output(tech_bwf_command).decode("ascii").rstrip().splitlines()[-1] - embedded_md5 = {'MD5Stored' : tech_bwf_csv.split(',')[16]} + # TODO use bwfmetaedit to get metadata instead + ffprobe_tags = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-show_entries", + "format_tags", + pm_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + ffprobe_tags = ffprobe_tags["format"]["tags"] + # core_bwf_command = [args.metaedit_path, '--out-core', pm_file_abspath] + tech_bwf_command = [args.metaedit_path, "--out-tech", pm_file_abspath] + # TODO fix - splitlines returns different results here depending on OS + tech_bwf_csv = ( + subprocess.check_output(tech_bwf_command) + .decode("ascii") + .rstrip() + .splitlines()[-1] + ) + embedded_md5 = {"MD5Stored": tech_bwf_csv.split(",")[16]} ffprobe_tags.update(embedded_md5) - #core_bwf_csv = subprocess.check_output(core_bwf_command).decode("ascii").rstrip() - return(ffprobe_tags) + # core_bwf_csv = subprocess.check_output(core_bwf_command).decode("ascii").rstrip() + return ffprobe_tags + def parse_mediaconchResults(mediaconchResults_dict): if "FAIL" in mediaconchResults_dict.values(): @@ -326,19 +449,21 @@ def parse_mediaconchResults(mediaconchResults_dict): for key in mediaconchResults_dict.keys(): if "FAIL" in mediaconchResults_dict.get(key): failed_policies.append(key) - mediaconchResults = mediaconchResults + ': ' + str(failed_policies).strip('[]') + mediaconchResults = mediaconchResults + ": " + str(failed_policies).strip("[]") else: mediaconchResults = "PASS" return mediaconchResults + def convert_runtime(duration): runtime = time.strftime("%H:%M:%S", time.gmtime(float(duration))) return runtime + def write_output_csv(csv_file, csvHeaderList, csvWriteList, qcResults): csvOutFileExists = os.path.isfile(csv_file) - with open(csv_file, 'a') as f: - writer = csv.writer(f, delimiter=',', lineterminator='\n') + with open(csv_file, "a") as f: + writer = csv.writer(f, delimiter=",", lineterminator="\n") if not csvOutFileExists: writer.writerow(csvHeaderList) writer.writerow(csvWriteList) diff --git a/AV/Audio/audio_processor/audio_processor/corefuncs.py b/AV/Audio/audio_processor/audio_processor/corefuncs.py index ef451a5..7e848b5 100644 --- a/AV/Audio/audio_processor/audio_processor/corefuncs.py +++ b/AV/Audio/audio_processor/audio_processor/corefuncs.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -''' +""" Functions that will be in multiple scripts Handle things like: input, output, checksumming, checking that software exists, etc. -''' +""" import os import hashlib @@ -12,40 +12,43 @@ import subprocess from audio_processor.audio_processor_parameters import args + def input_check(): - ''' + """ Checks if input was provided and if it is a directory that exists - ''' + """ if args.input_path: indir = args.input_path else: - print ("No input provided") + print("No input provided") quit() if not os.path.isdir(indir): - print('input is not a directory') + print("input is not a directory") quit() return indir + def output_check(output): - '''Checks that output is valid''' - if not output.endswith('.csv'): + """Checks that output is valid""" + if not output.endswith(".csv"): print("\n--- ERROR: Output must be a CSV file ---\n") quit() - #TODO add check if csv will go in a valid directory + # TODO add check if csv will go in a valid directory + def hashlib_md5(filename): - ''' + """ Uses hashlib to return an MD5 checksum of an input filename Credit: IFI scripts - ''' + """ read_size = 0 last_percent_done = 0 chksm = hashlib.md5() total_size = os.path.getsize(filename) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: while True: - #2**20 is for reading the file in 1 MiB chunks + # 2**20 is for reading the file in 1 MiB chunks buf = f.read(2**20) if not buf: break @@ -53,87 +56,103 @@ def hashlib_md5(filename): chksm.update(buf) percent_done = 100 * read_size / total_size if percent_done > last_percent_done: - sys.stdout.write('[%d%%]\r' % percent_done) + sys.stdout.write("[%d%%]\r" % percent_done) sys.stdout.flush() last_percent_done = percent_done md5_output = chksm.hexdigest() return md5_output + def mediaconch_policy_exists(policy_path): - ''' + """ checks that the specified mediaconch policy exists - ''' + """ if not os.path.isfile(policy_path): print("unable to find mediaconch policy:", policy_path) print("Check if file exists before running") quit() + def ffprobe_check(): - ''' + """ checks that ffprobe exists by running its -version command - ''' + """ try: - subprocess.check_output([ - args.ffprobe_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + subprocess.check_output([args.ffprobe_path, "-version"]).decode( + "ascii" + ).rstrip().splitlines()[0].split()[2] except: print("Error locating ffprobe") quit() + def mediaconch_check(): - ''' + """ checks that mediaconch exists by running its -v command - ''' + """ try: - subprocess.check_output([ - args.mediaconch_path, '-v' - ]).decode("ascii").rstrip().splitlines()[0] + subprocess.check_output([args.mediaconch_path, "-v"]).decode( + "ascii" + ).rstrip().splitlines()[0] except: - print('Error locating mediaconch') + print("Error locating mediaconch") quit() + def get_ffmpeg_version(): - ''' + """ Returns the version of ffmpeg - ''' - ffmpeg_version = 'ffmpeg' + """ + ffmpeg_version = "ffmpeg" try: - ffmpeg_version = subprocess.check_output([ - args.ffmpeg_path, '-version' - ]).decode("ascii").rstrip() - if not '--enable-libsoxr' in ffmpeg_version: - print('WARNING: ffmpeg is not configured with libsoxr. Exiting') + ffmpeg_version = ( + subprocess.check_output([args.ffmpeg_path, "-version"]) + .decode("ascii") + .rstrip() + ) + if not "--enable-libsoxr" in ffmpeg_version: + print("WARNING: ffmpeg is not configured with libsoxr. Exiting") quit() else: ffmpeg_version = ffmpeg_version.splitlines()[0].split()[2] except: - print ("Error getting ffmpeg version") + print("Error getting ffmpeg version") quit() return ffmpeg_version + def get_bwf_metaedit_version(): - ''' + """ Returns the version of BWF Metaedit by running the --Version command - ''' - metaedit_version = 'bwf metaedit' + """ + metaedit_version = "bwf metaedit" try: - metaedit_version = subprocess.check_output([ - args.metaedit_path, '--Version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + metaedit_version = ( + subprocess.check_output([args.metaedit_path, "--Version"]) + .decode("ascii") + .rstrip() + .splitlines()[0] + .split()[2] + ) except: print("Error getting BWF Metaedit version") quit() return metaedit_version + def get_sox_version(): - ''' + """ Returns the version of sox - ''' + """ try: - sox_version = subprocess.check_output([ - args.sox_path, '--version' - ]).decode("ascii").rstrip().splitlines()[0].split()#[2] - sox_version = ''.join([i for i in sox_version if i.startswith('v')]) + sox_version = ( + subprocess.check_output([args.sox_path, "--version"]) + .decode("ascii") + .rstrip() + .splitlines()[0] + .split() + ) # [2] + sox_version = "".join([i for i in sox_version if i.startswith("v")]) except: print("Error getting sox version") quit() diff --git a/AV/Film/dpx2ffv1/dpx2ffv1/corefuncs.py b/AV/Film/dpx2ffv1/dpx2ffv1/corefuncs.py index c303249..ba8c76d 100644 --- a/AV/Film/dpx2ffv1/dpx2ffv1/corefuncs.py +++ b/AV/Film/dpx2ffv1/dpx2ffv1/corefuncs.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -''' +""" Functions that will be in multiple scripts Handle things like: input, output, checksumming, checking that software exists, etc. -''' +""" import os import hashlib @@ -12,49 +12,52 @@ import subprocess from dpx2ffv1parameters import args + def input_check(): - ''' + """ Checks if input was provided and if it is a directory that exists - ''' + """ if args.input_path: indir = args.input_path else: - print ("No input provided") + print("No input provided") quit() if not os.path.isdir(indir): - print('input is not a directory') + print("input is not a directory") quit() return indir + def output_check(): - ''' + """ Checks if output was provided and if it is a directory that exists If no output is provided, output folder will default to input - ''' + """ if args.output_path: outdir = args.output_path else: - print('Output not specified. Using input directory as Output directory') + print("Output not specified. Using input directory as Output directory") outdir = args.input_path if not os.path.isdir(outdir): - print('output is not a directory') + print("output is not a directory") quit() - return (outdir) + return outdir + def hashlib_md5(filename): - ''' + """ Uses hashlib to return an MD5 checksum of an input filename Credit: IFI scripts - ''' + """ read_size = 0 last_percent_done = 0 chksm = hashlib.md5() total_size = os.path.getsize(filename) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: while True: - #2**20 is for reading the file in 1 MiB chunks + # 2**20 is for reading the file in 1 MiB chunks buf = f.read(2**20) if not buf: break @@ -62,93 +65,111 @@ def hashlib_md5(filename): chksm.update(buf) percent_done = 100 * read_size / total_size if percent_done > last_percent_done: - sys.stdout.write('[%d%%]\r' % percent_done) + sys.stdout.write("[%d%%]\r" % percent_done) sys.stdout.flush() last_percent_done = percent_done md5_output = chksm.hexdigest() return md5_output + def mediaconch_policy_exists(policy_path): - ''' + """ checks that the specified mediaconch policy exists - ''' + """ if not os.path.isfile(policy_path): print("unable to find mediaconch policy:", policy_path) print("Check if file exists before running") quit() + def ffprobe_check(): - ''' + """ checks that ffprobe exists by running its -version command - ''' + """ try: - subprocess.check_output([ - args.ffprobe_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + subprocess.check_output([args.ffprobe_path, "-version"]).decode( + "ascii" + ).rstrip().splitlines()[0].split()[2] except: print("Error locating ffprobe") quit() + def mediaconch_check(): - ''' + """ checks that mediaconch exists by running its -v command - ''' + """ try: - subprocess.check_output([ - args.mediaconch_path, '-v' - ]).decode("ascii").rstrip().splitlines()[0] + subprocess.check_output([args.mediaconch_path, "-v"]).decode( + "ascii" + ).rstrip().splitlines()[0] except: - print('Error locating mediaconch') + print("Error locating mediaconch") quit() + def qcli_check(): - ''' + """ checks that qcli exists by running its -version command - ''' + """ try: - subprocess.check_output([ - args.qcli_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0] + subprocess.check_output([args.qcli_path, "-version"]).decode( + "ascii" + ).rstrip().splitlines()[0] except: - print('Error locating qcli') + print("Error locating qcli") quit() + def mediainfo_check(): - ''' + """ checks that qcli exists by running its -version command - ''' + """ try: - mediainfo_version = subprocess.check_output([ - args.mediainfo_path, '--version' - ]).decode("ascii").rstrip().splitlines()[1].split()[2] + mediainfo_version = ( + subprocess.check_output([args.mediainfo_path, "--version"]) + .decode("ascii") + .rstrip() + .splitlines()[1] + .split()[2] + ) except: - print('Error locating mediainfo') + print("Error locating mediainfo") quit() + def get_ffmpeg_version(): - ''' + """ Returns the version of ffmpeg - ''' - ffmpeg_version = 'ffmpeg' + """ + ffmpeg_version = "ffmpeg" try: - ffmpeg_version = subprocess.check_output([ - args.ffmpeg_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + ffmpeg_version = ( + subprocess.check_output([args.ffmpeg_path, "-version"]) + .decode("ascii") + .rstrip() + .splitlines()[0] + .split()[2] + ) except: - print ("Error getting ffmpeg version") + print("Error getting ffmpeg version") quit() return ffmpeg_version + def get_rawcooked_version(): - ''' + """ Returns the version of ffmpeg - ''' - rawcooked_version = 'rawcooked' + """ + rawcooked_version = "rawcooked" try: - rawcooked_version = subprocess.check_output([ - args.rawcooked_path, '--version' - ]).decode("ascii").rstrip().split()[1] + rawcooked_version = ( + subprocess.check_output([args.rawcooked_path, "--version"]) + .decode("ascii") + .rstrip() + .split()[1] + ) except: - print ("Error getting rawcooked version") + print("Error getting rawcooked version") quit() return rawcooked_version diff --git a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1.py b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1.py index 39d695c..e835bc0 100755 --- a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1.py +++ b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1.py @@ -12,26 +12,26 @@ import corefuncs import dpx2ffv1supportfuncs -#TO DO: general clean up (improve readability by separating out some functions); merge avfuncs scripts between different transcode scripts where overlapping functions exist and separate format specific functions better +# TO DO: general clean up (improve readability by separating out some functions); merge avfuncs scripts between different transcode scripts where overlapping functions exist and separate format specific functions better if sys.version_info[0] < 3: raise Exception("Python 3 or a more recent version is required.") -#set up the input, output, and other folder/file sctructure elements -#expected folder/file structure is input/title/subfolder_identifier/title_0000001.dpx +# set up the input, output, and other folder/file sctructure elements +# expected folder/file structure is input/title/subfolder_identifier/title_0000001.dpx indir = corefuncs.input_check() outdir = corefuncs.output_check() -#TO DO: allow running without subfolder identifier to support having the dpx files directly in the title folder? -#set the subfolder_identifier. Defaults to 'pm' if not specified +# TO DO: allow running without subfolder identifier to support having the dpx files directly in the title folder? +# set the subfolder_identifier. Defaults to 'pm' if not specified if not args.subfolder_identifier: - subfolder_identifier = 'p' + subfolder_identifier = "p" else: subfolder_identifier = args.subfolder_identifier -access_file_identifier = 'a' -access_file_extension = '.mov' +access_file_identifier = "a" +access_file_extension = ".mov" -#set a limit so that the command only runs on title folders containing the string specified +# set a limit so that the command only runs on title folders containing the string specified limit = dpx2ffv1supportfuncs.assign_limit() title_list = dpx2ffv1supportfuncs.get_immediate_subdirectories(indir) if args.filter_list: @@ -43,135 +43,199 @@ checklist = [] for title in title_list: if not limit or (limit) in title: - #currently the input folder name is fixed as input/title/pm for consistency in the structure of the RAWcooked data + # currently the input folder name is fixed as input/title/pm for consistency in the structure of the RAWcooked data if os.path.isdir(os.path.join(indir, title, subfolder_identifier)): - #TO DO: differentiate subfolder_identifier and dpx_subfolder_identifier + # TO DO: differentiate subfolder_identifier and dpx_subfolder_identifier title_abspath = os.path.join(indir, title) indirbase = os.path.join(title_abspath, subfolder_identifier) outpathbase = os.path.join(outdir, title) outpathfull = os.path.join(outpathbase, subfolder_identifier) - ffv1_name = os.path.join(title + '_' + subfolder_identifier + '.mkv') - framemd5_name = os.path.join(title + '_' + subfolder_identifier + '.framemd5') + ffv1_name = os.path.join(title + "_" + subfolder_identifier + ".mkv") + framemd5_name = os.path.join( + title + "_" + subfolder_identifier + ".framemd5" + ) mkv_abspath = os.path.join(outpathfull, ffv1_name) framemd5_abspath = os.path.join(outpathfull, framemd5_name) - #TO DO: it may be better to make the default behavior be to just run rawcooked on title - #then you could add a flag where you specify pm folders if they exist + # TO DO: it may be better to make the default behavior be to just run rawcooked on title + # then you could add a flag where you specify pm folders if they exist - #TO DO check for md5 file in dpx folders - #if not found, generate and output to input folder + # TO DO check for md5 file in dpx folders + # if not found, generate and output to input folder - print('\n'"***Processing", title + "***" + '\n') + print("\n" "***Processing", title + "***" + "\n") - #gather system metadata + # gather system metadata osinfo = platform.platform() ffvers = corefuncs.get_ffmpeg_version() rawcvers = corefuncs.get_rawcooked_version() - tstime = datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S') + tstime = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S") - #build and execute rawcooked command - rawcooked_command = [args.rawcooked_path, '--all', '--framemd5', '--framemd5-name', framemd5_abspath] + # build and execute rawcooked command + rawcooked_command = [ + args.rawcooked_path, + "--all", + "--framemd5", + "--framemd5-name", + framemd5_abspath, + ] if args.framerate: - rawcooked_command += ['-framerate', args.framerate] - rawcooked_command += [indirbase, '-o', mkv_abspath] - #print(rawcooked_command) - rawcooked_results = subprocess.check_output(rawcooked_command).decode("ascii").rstrip() + rawcooked_command += ["-framerate", args.framerate] + rawcooked_command += [indirbase, "-o", mkv_abspath] + # print(rawcooked_command) + rawcooked_results = ( + subprocess.check_output(rawcooked_command).decode("ascii").rstrip() + ) - #log transcode finish time - tftime = datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S') + # log transcode finish time + tftime = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S") - #if output exists, create an md5checksum + # if output exists, create an md5checksum if os.path.isfile(mkv_abspath): mkvhash = dpx2ffv1supportfuncs.hashlib_md5(mkv_abspath) else: mkvhash = None - #write md5 checksum to md5 file - with open (os.path.join(outpathfull, title + '_' + subfolder_identifier + '.md5'), 'w', newline='\n') as f: - print(mkvhash, '*' + ffv1_name, file=f) + # write md5 checksum to md5 file + with open( + os.path.join(outpathfull, title + "_" + subfolder_identifier + ".md5"), + "w", + newline="\n", + ) as f: + print(mkvhash, "*" + ffv1_name, file=f) data = {} data[title] = [] - metadict = { 'system information': { - 'operating system': osinfo, - 'ffmpeg version': ffvers, - 'rawcooked version': rawcvers, - 'transcode start time': tstime, - 'transcode end time': tftime - }} - - if 'Reversability was checked, no issue detected.' in rawcooked_results: - lossless_pass_fail = 'PASS' + metadict = { + "system information": { + "operating system": osinfo, + "ffmpeg version": ffvers, + "rawcooked version": rawcvers, + "transcode start time": tstime, + "transcode end time": tftime, + } + } + + if "Reversability was checked, no issue detected." in rawcooked_results: + lossless_pass_fail = "PASS" else: - lossless_pass_fail = 'FAIL; ' + rawcooked_results + lossless_pass_fail = "FAIL; " + rawcooked_results attachments = dpx2ffv1supportfuncs.list_mkv_attachments(mkv_abspath) format_metadict = dpx2ffv1supportfuncs.get_mkv_format_metadata(mkv_abspath) video_metadict = dpx2ffv1supportfuncs.get_mkv_video_metadata(mkv_abspath) audio_metadict = dpx2ffv1supportfuncs.get_mkv_audio_metadata(mkv_abspath) - #note that the size is output in bytes. This can be converted to MiB by dividing by 1024**2 or GiB by dividing by 1024**3 - dpxsize = dpx2ffv1supportfuncs.get_folder_size(os.path.join(indir, title, subfolder_identifier)) - #log ffv1 (folder) size + # note that the size is output in bytes. This can be converted to MiB by dividing by 1024**2 or GiB by dividing by 1024**3 + dpxsize = dpx2ffv1supportfuncs.get_folder_size( + os.path.join(indir, title, subfolder_identifier) + ) + # log ffv1 (folder) size mkvsize = dpx2ffv1supportfuncs.get_folder_size(outpathfull) - pm_runtime = format_metadict.get('format')['duration'] + pm_runtime = format_metadict.get("format")["duration"] if args.check_runtime: - ac_runtime = dpx2ffv1supportfuncs.grab_runtime(title_abspath, access_file_identifier, access_file_extension) + ac_runtime = dpx2ffv1supportfuncs.grab_runtime( + title_abspath, access_file_identifier, access_file_extension + ) else: ac_runtime = None if ac_runtime: runtime_dif = abs(float(ac_runtime) - float(pm_runtime)) if runtime_dif < 0.2: - runtime_pass_fail = 'PASS' + runtime_pass_fail = "PASS" else: - runtime_pass_fail = 'FAIL, p runtime = ' + pm_runtime + "; a runtime = " + ac_runtime + runtime_pass_fail = ( + "FAIL, p runtime = " + + pm_runtime + + "; a runtime = " + + ac_runtime + ) else: - runtime_pass_fail = 'Not Checked' - post_transcode_dict = { 'post-transcode metadata': { - 'filename': ffv1_name, - 'md5 checksum': mkvhash, - 'duration' : pm_runtime, - 'streams' : format_metadict.get('format')['nb_streams'], - 'compressed size': mkvsize, - 'uncompressed size': dpxsize - }} - video_dict = {'video': { - 'video streams': [stream.get('codec_name') for stream in (video_metadict['streams'])], - 'framerate': [stream.get('r_frame_rate') for stream in (video_metadict['streams'])][0], - 'width': [stream.get('width') for stream in (video_metadict['streams'])][0], - 'height': [stream.get('height') for stream in (video_metadict['streams'])][0], - 'sample_aspect_ratio': [stream.get('sample_aspect_ratio') for stream in (video_metadict['streams'])][0], - 'display_aspect_ratio': [stream.get('display_aspect_ratio') for stream in (video_metadict['streams'])][0], - 'pixel format': [stream.get('pix_fmt') for stream in (video_metadict['streams'])][0] - }} - audio_dict = {'audio': { - 'audio codecs': [stream.get('codec_long_name') for stream in (audio_metadict['streams'])], - 'audio bitrate': [stream.get('bits_per_raw_sample') for stream in (audio_metadict['streams'])], - 'audio sample rate': [stream.get('sample_rate') for stream in (audio_metadict['streams'])], - 'audio channels': [stream.get('channels') for stream in (audio_metadict['streams'])] - }} - data_dict = {'data': { - 'attachments': attachments - }} - QC_dict = {'QC': { - 'Runtime Check' : runtime_pass_fail, - 'Lossless Check' : lossless_pass_fail - #'MediaConch Results' - }} - output_technical_metadata = {'technical metadata': [video_dict, audio_dict, data_dict]} + runtime_pass_fail = "Not Checked" + post_transcode_dict = { + "post-transcode metadata": { + "filename": ffv1_name, + "md5 checksum": mkvhash, + "duration": pm_runtime, + "streams": format_metadict.get("format")["nb_streams"], + "compressed size": mkvsize, + "uncompressed size": dpxsize, + } + } + video_dict = { + "video": { + "video streams": [ + stream.get("codec_name") + for stream in (video_metadict["streams"]) + ], + "framerate": [ + stream.get("r_frame_rate") + for stream in (video_metadict["streams"]) + ][0], + "width": [ + stream.get("width") for stream in (video_metadict["streams"]) + ][0], + "height": [ + stream.get("height") for stream in (video_metadict["streams"]) + ][0], + "sample_aspect_ratio": [ + stream.get("sample_aspect_ratio") + for stream in (video_metadict["streams"]) + ][0], + "display_aspect_ratio": [ + stream.get("display_aspect_ratio") + for stream in (video_metadict["streams"]) + ][0], + "pixel format": [ + stream.get("pix_fmt") for stream in (video_metadict["streams"]) + ][0], + } + } + audio_dict = { + "audio": { + "audio codecs": [ + stream.get("codec_long_name") + for stream in (audio_metadict["streams"]) + ], + "audio bitrate": [ + stream.get("bits_per_raw_sample") + for stream in (audio_metadict["streams"]) + ], + "audio sample rate": [ + stream.get("sample_rate") + for stream in (audio_metadict["streams"]) + ], + "audio channels": [ + stream.get("channels") for stream in (audio_metadict["streams"]) + ], + } + } + data_dict = {"data": {"attachments": attachments}} + QC_dict = { + "QC": { + "Runtime Check": runtime_pass_fail, + "Lossless Check": lossless_pass_fail + #'MediaConch Results' + } + } + output_technical_metadata = { + "technical metadata": [video_dict, audio_dict, data_dict] + } post_transcode_dict.update(output_technical_metadata) metadict.update(post_transcode_dict) metadict.update(QC_dict) data[title].append(metadict) - with open(os.path.join(outpathfull, title + '_p.json'), 'w', newline='\n') as outfile: + with open( + os.path.join(outpathfull, title + "_p.json"), "w", newline="\n" + ) as outfile: json.dump(data, outfile, indent=4) else: - print('no preservation folder in input directory') + print("no preservation folder in input directory") elif limit and not (limit) in title: - print(title, 'does not contain ', limit) + print(title, "does not contain ", limit) -#if args.verifylossless: - #for *.mkv run ffprobe to check if file contains a rawcooked reversibility file - #if so, funnel into rawcooked decode pathway +# if args.verifylossless: +# for *.mkv run ffprobe to check if file contains a rawcooked reversibility file +# if so, funnel into rawcooked decode pathway -#TODO - move this to supportfuncs and rewrite it for the new version of rawcooked -''' +# TODO - move this to supportfuncs and rewrite it for the new version of rawcooked +""" if args.decodeffv1: indir = corefuncs.input_check() outdir = corefuncs.output_check() @@ -203,4 +267,4 @@ print ("More than 1 mkv file found in", os.path.join(indir, object_folder)) elif limit and not (limit) in object_folder: print("Skipped", object_folder) -''' +""" diff --git a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1parameters.py b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1parameters.py index 9bce8be..d0fa9f6 100644 --- a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1parameters.py +++ b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1parameters.py @@ -5,28 +5,100 @@ parser = argparse.ArgumentParser() -parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.30') +parser.add_argument("-v", "--version", action="version", version="%(prog)s 0.30") -parser.add_argument('--framerate', '-r', required=False, action='store', dest='framerate', help='Sets framerate for dpx2ffv1 conversion script. Defaults to 24 if not specified') +parser.add_argument( + "--framerate", + "-r", + required=False, + action="store", + dest="framerate", + help="Sets framerate for dpx2ffv1 conversion script. Defaults to 24 if not specified", +) -parser.add_argument('--input', '-i', action='store', dest='input_path', type=str, help='full path to input folder') -parser.add_argument('--output', '-o', action='store', dest='output_path', type=str, help='full path to output folder') -parser.add_argument('--subfolder_identifier', action='store', dest='subfolder_identifier', type=str, help='Specifies the folder identifier if files are located in a subfolder. For example, a pm folder within an object folder.') -parser.add_argument('--limit', action='store', dest='textlimit', type=str, help='Defines a string that limits which folders batch commands run on. If not used, commands will run on all immediate subdirectories of the input') -parser.add_argument('--filter_list', action='store', dest='filter_list', type=str, help='Pull list of files to process from a text file') +parser.add_argument( + "--input", + "-i", + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output folder", +) +parser.add_argument( + "--subfolder_identifier", + action="store", + dest="subfolder_identifier", + type=str, + help="Specifies the folder identifier if files are located in a subfolder. For example, a pm folder within an object folder.", +) +parser.add_argument( + "--limit", + action="store", + dest="textlimit", + type=str, + help="Defines a string that limits which folders batch commands run on. If not used, commands will run on all immediate subdirectories of the input", +) +parser.add_argument( + "--filter_list", + action="store", + dest="filter_list", + type=str, + help="Pull list of files to process from a text file", +) -parser.add_argument('--dpxcheck', action='store', dest='dpxcheck_path', type=str, help='Full path to a location where DPX files will be decoded from FFV1 files. If left blank, this will default to the folder where the FFV1 file being decoded is located.') -#TO DO: implement ffv1 decode -#parser.add_argument('--decodeffv1', required=False, action='store_true', help='For each folder in input, generate a decode the FFV1 file back to a dpx sequence, check md5 checksums and compare file size of FFV1 file and DPX sequence') -#TO DO: implement mkv verification -#parser.add_argument('--verifymkv', required=False, action='store_true', help='') +parser.add_argument( + "--dpxcheck", + action="store", + dest="dpxcheck_path", + type=str, + help="Full path to a location where DPX files will be decoded from FFV1 files. If left blank, this will default to the folder where the FFV1 file being decoded is located.", +) +# TO DO: implement ffv1 decode +# parser.add_argument('--decodeffv1', required=False, action='store_true', help='For each folder in input, generate a decode the FFV1 file back to a dpx sequence, check md5 checksums and compare file size of FFV1 file and DPX sequence') +# TO DO: implement mkv verification +# parser.add_argument('--verifymkv', required=False, action='store_true', help='') -parser.add_argument('--check_runtime', '-c', action='store_true', required=False, help='checks ffv1 runtime against access copy runtime after transcode') -#parser.add_argument('--notparanoid', action='store_true', required=False, help='Include in dpx2ffv1 conversion to skip decoding ffv1 sequence back to dpx and generating verification logs') +parser.add_argument( + "--check_runtime", + "-c", + action="store_true", + required=False, + help="checks ffv1 runtime against access copy runtime after transcode", +) +# parser.add_argument('--notparanoid', action='store_true', required=False, help='Include in dpx2ffv1 conversion to skip decoding ffv1 sequence back to dpx and generating verification logs') -parser.add_argument('--rawcooked', action='store', dest='rawcooked_path', default='rawcooked', type=str, help='The full path to RAWcooked. Use if you need to specify a custom path to rawcooked.') -#TODO: fix - currently, specifying a custom path to ffmpeg does nothing because rawcooked handles running ffmpeg -parser.add_argument('--ffmpeg', action='store', dest='ffmpeg_path', default='ffmpeg', type=str, help='The full path to ffmpeg. Use if you need to specify a custom path to ffmpeg.') -parser.add_argument('--ffprobe', action='store', dest='ffprobe_path', default='ffprobe', type=str, help='The full path to ffprobe. Use if you need to specify a custom path to ffprobe.') +parser.add_argument( + "--rawcooked", + action="store", + dest="rawcooked_path", + default="rawcooked", + type=str, + help="The full path to RAWcooked. Use if you need to specify a custom path to rawcooked.", +) +# TODO: fix - currently, specifying a custom path to ffmpeg does nothing because rawcooked handles running ffmpeg +parser.add_argument( + "--ffmpeg", + action="store", + dest="ffmpeg_path", + default="ffmpeg", + type=str, + help="The full path to ffmpeg. Use if you need to specify a custom path to ffmpeg.", +) +parser.add_argument( + "--ffprobe", + action="store", + dest="ffprobe_path", + default="ffprobe", + type=str, + help="The full path to ffprobe. Use if you need to specify a custom path to ffprobe.", +) args = parser.parse_args() diff --git a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1supportfuncs.py b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1supportfuncs.py index 146e956..b0037e2 100644 --- a/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1supportfuncs.py +++ b/AV/Film/dpx2ffv1/dpx2ffv1/dpx2ffv1supportfuncs.py @@ -12,6 +12,7 @@ import posixpath from dpx2ffv1parameters import args + def assign_limit(): if args.textlimit: limit = args.textlimit @@ -19,24 +20,27 @@ def assign_limit(): limit = None return limit + def get_immediate_subdirectories(folder): - ''' + """ get list of immediate subdirectories of input - ''' - return [name for name in os.listdir(folder) - if os.path.isdir(os.path.join(folder, name))] + """ + return [ + name for name in os.listdir(folder) if os.path.isdir(os.path.join(folder, name)) + ] + def hashlib_md5(filename): - ''' + """ Uses hashlib to return an MD5 checksum of an input filename - ''' + """ read_size = 0 last_percent_done = 0 chksm = hashlib.md5() total_size = os.path.getsize(filename) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: while True: - #2**20 is for reading the file in 1 MiB chunks + # 2**20 is for reading the file in 1 MiB chunks buf = f.read(2**20) if not buf: break @@ -44,18 +48,21 @@ def hashlib_md5(filename): chksm.update(buf) percent_done = 100 * read_size / total_size if percent_done > last_percent_done: - sys.stdout.write('[%d%%]\r' % percent_done) + sys.stdout.write("[%d%%]\r" % percent_done) sys.stdout.flush() last_percent_done = percent_done md5_output = chksm.hexdigest() return md5_output -#this function is from the IFI's scripts with a minor change -#open(str(filename)), 'rb') as f has been changed to open(filename, 'rb') as f + + +# this function is from the IFI's scripts with a minor change +# open(str(filename)), 'rb') as f has been changed to open(filename, 'rb') as f + def get_folder_size(folder): - ''' + """ Calculate the folder size - ''' + """ total_size = 0 d = os.scandir(folder) for entry in d: @@ -65,56 +72,103 @@ def get_folder_size(folder): else: total_size += entry.stat().st_size except FileNotFoundError: - #file was deleted during scandir + # file was deleted during scandir pass except PermissionError: return 0 return total_size -''' + +""" #use mediainfo instead of ffprobe (?) def dpx2ffv1_ffprobe_report(input_abspath): video_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'v', '-show_entries', 'stream=codec_name,avg_frame_rate,codec_time_base,width,height,pix_fmt,codec_tag_string', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) audio_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'a', '-show_entries', 'stream=codec_name,codec_time_base,codec_tag_string', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) -''' +""" + def list_mkv_attachments(input_file_abspath): - #could also identify with -select streams m:filename - t_probe_out = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 't', '-show_entries', 'stream_tags=filename', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - tags = [streams.get('tags') for streams in (t_probe_out['streams'])] + # could also identify with -select streams m:filename + t_probe_out = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "t", + "-show_entries", + "stream_tags=filename", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + tags = [streams.get("tags") for streams in (t_probe_out["streams"])] attachment_list = [] for i in tags: - filename = [i.get('filename')] + filename = [i.get("filename")] attachment_list.extend(filename) return attachment_list + def get_mkv_video_metadata(input_file_abspath): - ffprobe_command = [args.ffprobe_path, '-v', 'error', '-select_streams', 'v',] - ffprobe_command += ['-show_entries', 'stream=codec_name,width,height,pix_fmt,sample_aspect_ratio,display_aspect_ratio,r_frame_rate'] - ffprobe_command += [input_file_abspath, '-of', 'json'] - video_meta_out = json.loads(subprocess.check_output(ffprobe_command).decode("ascii").rstrip()) + ffprobe_command = [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "v", + ] + ffprobe_command += [ + "-show_entries", + "stream=codec_name,width,height,pix_fmt,sample_aspect_ratio,display_aspect_ratio,r_frame_rate", + ] + ffprobe_command += [input_file_abspath, "-of", "json"] + video_meta_out = json.loads( + subprocess.check_output(ffprobe_command).decode("ascii").rstrip() + ) return video_meta_out + def get_mkv_audio_metadata(input_file_abspath): - ffprobe_command = [args.ffprobe_path, '-v', 'error', '-select_streams', 'a',] - ffprobe_command += ['-show_entries', 'stream=codec_long_name,bits_per_raw_sample,sample_rate,channels'] - ffprobe_command += [input_file_abspath, '-of', 'json'] - audio_meta_out = json.loads(subprocess.check_output(ffprobe_command).decode("ascii").rstrip()) + ffprobe_command = [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "a", + ] + ffprobe_command += [ + "-show_entries", + "stream=codec_long_name,bits_per_raw_sample,sample_rate,channels", + ] + ffprobe_command += [input_file_abspath, "-of", "json"] + audio_meta_out = json.loads( + subprocess.check_output(ffprobe_command).decode("ascii").rstrip() + ) return audio_meta_out + def get_mkv_format_metadata(input_file_abspath): - ffprobe_command = [args.ffprobe_path, '-v', 'error'] - ffprobe_command += ['-show_entries', 'format=duration,nb_streams'] - ffprobe_command += [input_file_abspath, '-of', 'json'] - format_meta_out = json.loads(subprocess.check_output(ffprobe_command).decode("ascii").rstrip()) + ffprobe_command = [args.ffprobe_path, "-v", "error"] + ffprobe_command += ["-show_entries", "format=duration,nb_streams"] + ffprobe_command += [input_file_abspath, "-of", "json"] + format_meta_out = json.loads( + subprocess.check_output(ffprobe_command).decode("ascii").rstrip() + ) return format_meta_out + def dpx_md5_compare(dpxfolder): - ''' + """ Returns two sets One from the original DPX sequence's md5 checksum The other from the calculated checksums of the decoded DPX sequence - ''' + """ md5list = [] orig_md5list = {} for i in os.listdir(dpxfolder): @@ -125,25 +179,41 @@ def dpx_md5_compare(dpxfolder): pass else: y = hashlib_md5(abspath) - filehash = y + ' *' + i + filehash = y + " *" + i md5list.append(filehash) compareset = set(md5list) return compareset, orig_md5list + def grab_runtime(folder, subfolder_identifier, filetype): - ''' + """ Look for an ac folder containing an video file of specified type If found, return the runtime - ''' + """ itemfolder = os.path.join(folder, subfolder_identifier) if os.path.isdir(itemfolder): - videofile = glob.glob1(itemfolder, '*' + filetype) + videofile = glob.glob1(itemfolder, "*" + filetype) filecounter = len(videofile) if filecounter == 1: for i in videofile: file_abspath = os.path.join(itemfolder, i) - runtime = subprocess.check_output([args.ffprobe_path, '-v', 'error', file_abspath, '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1']).decode("ascii").rstrip() - #this returns the total runtime in seconds + runtime = ( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + file_abspath, + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + ] + ) + .decode("ascii") + .rstrip() + ) + # this returns the total runtime in seconds elif filecounter < 1: runtime = "no " + filetype + " files found in " + itemfolder elif filecounter > 1: @@ -151,8 +221,10 @@ def grab_runtime(folder, subfolder_identifier, filetype): else: runtime = "no " + subfolder_identifier + "folder found" return runtime - #when comparing runtimes, you could check if this value is a float, which would allow you to know if there was an error here -''' + # when comparing runtimes, you could check if this value is a float, which would allow you to know if there was an error here + + +""" def verification_check(folder): verifile = os.path.join(folder, 'pm', "verification_log.txt") if not os.path.isfile(verifile): @@ -201,4 +273,4 @@ def verification_check(folder): print('\t'"pm runtime was not logged") elif not "not logged" in pm_runtime and "not logged" in ac_runtime: print('\t'"ac runtime was not logged") -''' +""" diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/aja_mov2ffv1.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/aja_mov2ffv1.py index 2632712..5c82d62 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/aja_mov2ffv1.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/aja_mov2ffv1.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 import sys + if sys.version_info[0] < 3: raise Exception("Python 3 or a more recent version is required.") + def main(): from aja_mov2ffv1 import mov2ffv1mainfunc + mov2ffv1mainfunc.aja_mov2ffv1_main() + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/corefuncs.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/corefuncs.py index 4afddb1..9797651 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/corefuncs.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/corefuncs.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -''' +""" Functions that will be in multiple scripts Handle things like: input, output, checksumming, checking that software exists, etc. -''' +""" import os import hashlib @@ -12,49 +12,52 @@ import subprocess from aja_mov2ffv1.mov2ffv1parameters import args + def input_check(): - ''' + """ Checks if input was provided and if it is a directory that exists - ''' + """ if args.input_path: indir = args.input_path else: - print ("No input provided") + print("No input provided") quit() if not os.path.isdir(indir): - print('input is not a directory') + print("input is not a directory") quit() return indir + def output_check(): - ''' + """ Checks if output was provided and if it is a directory that exists If no output is provided, output folder will default to input - ''' + """ if args.output_path: outdir = args.output_path else: - print('Output not specified. Using input directory as Output directory') + print("Output not specified. Using input directory as Output directory") outdir = args.input_path - + if not os.path.isdir(outdir): - print('output is not a directory') + print("output is not a directory") quit() - return (outdir) + return outdir + def hashlib_md5(filename): - ''' + """ Uses hashlib to return an MD5 checksum of an input filename Credit: IFI scripts - ''' + """ read_size = 0 last_percent_done = 0 chksm = hashlib.md5() total_size = os.path.getsize(filename) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: while True: - #2**20 is for reading the file in 1 MiB chunks + # 2**20 is for reading the file in 1 MiB chunks buf = f.read(2**20) if not buf: break @@ -62,67 +65,76 @@ def hashlib_md5(filename): chksm.update(buf) percent_done = 100 * read_size / total_size if percent_done > last_percent_done: - sys.stdout.write('[%d%%]\r' % percent_done) + sys.stdout.write("[%d%%]\r" % percent_done) sys.stdout.flush() last_percent_done = percent_done md5_output = chksm.hexdigest() return md5_output + def mediaconch_policy_exists(policy_path): - ''' + """ checks that the specified mediaconch policy exists - ''' + """ if not os.path.isfile(policy_path): print("unable to find mediaconch policy:", policy_path) print("Check if file exists before running") quit() - + + def ffprobe_check(): - ''' + """ checks that ffprobe exists by running its -version command - ''' + """ try: - subprocess.check_output([ - args.ffprobe_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + subprocess.check_output([args.ffprobe_path, "-version"]).decode( + "ascii" + ).rstrip().splitlines()[0].split()[2] except: print("Error locating ffprobe") quit() + def mediaconch_check(): - ''' + """ checks that mediaconch exists by running its -v command - ''' + """ try: - subprocess.check_output([ - args.mediaconch_path, '-v' - ]).decode("ascii").rstrip().splitlines()[0] + subprocess.check_output([args.mediaconch_path, "-v"]).decode( + "ascii" + ).rstrip().splitlines()[0] except: - print('Error locating mediaconch') + print("Error locating mediaconch") quit() + def qcli_check(): - ''' + """ checks that qcli exists by running its -version command - ''' + """ try: - subprocess.check_output([ - args.qcli_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0] + subprocess.check_output([args.qcli_path, "-version"]).decode( + "ascii" + ).rstrip().splitlines()[0] except: - print('Error locating qcli') + print("Error locating qcli") quit() + def get_ffmpeg_version(): - ''' + """ Returns the version of ffmpeg - ''' - ffmpeg_version = 'ffmpeg' + """ + ffmpeg_version = "ffmpeg" try: - ffmpeg_version = subprocess.check_output([ - args.ffmpeg_path, '-version' - ]).decode("ascii").rstrip().splitlines()[0].split()[2] + ffmpeg_version = ( + subprocess.check_output([args.ffmpeg_path, "-version"]) + .decode("ascii") + .rstrip() + .splitlines()[0] + .split()[2] + ) except: - print ("Error getting ffmpeg version") + print("Error getting ffmpeg version") quit() - return ffmpeg_version \ No newline at end of file + return ffmpeg_version diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/equipment_dict.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/equipment_dict.py index 6a56ed4..d089010 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/equipment_dict.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/equipment_dict.py @@ -1,52 +1,46 @@ #!/usr/bin/env python3 -def equipment_dict(): - ''' +def equipment_dict(): + """ VTRs - ''' + """ panasonic_ag1980 = { - 'Coding Algorithm' : 'A=ANALOG', - 'Output is one of' : ['Composite', 'S-Video'] + "Coding Algorithm": "A=ANALOG", + "Output is one of": ["Composite", "S-Video"], } - ''' + """ TBCs - ''' + """ dps_295 = { - 'Coding Algorithm' : 'A=ANALOG', - 'Hardware Type' : 'TBC', - 'Output is one of' : ['Composite', 'S-Video', 'Component'] + "Coding Algorithm": "A=ANALOG", + "Hardware Type": "TBC", + "Output is one of": ["Composite", "S-Video", "Component"], } fa_510 = { - 'Coding Algorithm' : 'A=ANALOG', - 'Hardware Type' : 'TBC', - 'Output is one of' : ['Composite', 'S-Video', 'Component'] + "Coding Algorithm": "A=ANALOG", + "Hardware Type": "TBC", + "Output is one of": ["Composite", "S-Video", "Component"], } - ''' + """ ADCs - ''' - hd10ava = { - 'Coding Algorithm' : 'A=SDI', - 'Hardware Type' : 'A/D' - } + """ + hd10ava = {"Coding Algorithm": "A=SDI", "Hardware Type": "A/D"} - ''' + """ Capture Cards - ''' - kona1 = { - 'Coding Algorithm' : 'A=v210', - 'Hardware Type' : 'DIO' - } - + """ + kona1 = {"Coding Algorithm": "A=v210", "Hardware Type": "DIO"} + equipment_dict = { - 'Panasonic AG-1980P' : panasonic_ag1980, - 'DPS-295' : dps_295, - 'FA-510' : fa_510, - 'HD10AVA' : hd10ava, - 'Kona-1-T-R0' : kona1 + "Panasonic AG-1980P": panasonic_ag1980, + "DPS-295": dps_295, + "FA-510": fa_510, + "HD10AVA": hd10ava, + "Kona-1-T-R0": kona1, } - return equipment_dict \ No newline at end of file + return equipment_dict diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1mainfunc.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1mainfunc.py index cf1ce56..22c25e4 100755 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1mainfunc.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1mainfunc.py @@ -11,222 +11,291 @@ from aja_mov2ffv1 import corefuncs from aja_mov2ffv1 import mov2ffv1passfail_checks -#TO DO: general cleanup +# TO DO: general cleanup + def aja_mov2ffv1_main(): - #the pm identifier is the name of the folder that the preservation file will be output to - pm_identifier = 'p' - #the ac identifier will be used as the folder name for the access file - #it will also be appended to the end of the access copy filename - ac_identifier = 'a' - metadata_identifier = 'meta' - #identifier appended to the end of the MKV preservation file - #Replace with "None" to keep the name the same as the input + # the pm identifier is the name of the folder that the preservation file will be output to + pm_identifier = "p" + # the ac identifier will be used as the folder name for the access file + # it will also be appended to the end of the access copy filename + ac_identifier = "a" + metadata_identifier = "meta" + # identifier appended to the end of the MKV preservation file + # Replace with "None" to keep the name the same as the input if not args.keep_filename: - pm_filename_identifier = '-p' + pm_filename_identifier = "-p" else: pm_filename_identifier = None - inventoryName = 'transcode_inventory.csv' - #assign mediaconch policies to use + inventoryName = "transcode_inventory.csv" + # assign mediaconch policies to use if not args.input_policy: - movPolicy = os.path.join(os.path.dirname(__file__), 'data/mediaconch_policies/AJA_NTSC_VHS-2SAS-MOV.xml') + movPolicy = os.path.join( + os.path.dirname(__file__), + "data/mediaconch_policies/AJA_NTSC_VHS-2SAS-MOV.xml", + ) else: movPolicy = args.input_policy if not args.output_policy: - mkvPolicy = os.path.join(os.path.dirname(__file__), 'data/mediaconch_policies/AJA_NTSC_VHS-2SAS-MKV.xml') + mkvPolicy = os.path.join( + os.path.dirname(__file__), + "data/mediaconch_policies/AJA_NTSC_VHS-2SAS-MKV.xml", + ) else: mkvPolicy = args.output_policy - #assign input directory and output directory + # assign input directory and output directory indir = corefuncs.input_check() outdir = corefuncs.output_check() - #check that mixdown argument is valid if provided + # check that mixdown argument is valid if provided mov2ffv1supportfuncs.check_mixdown_arg() - #check that required programs are present + # check that required programs are present if not args.skip_qcli: corefuncs.qcli_check() corefuncs.mediaconch_check() corefuncs.ffprobe_check() ffvers = corefuncs.get_ffmpeg_version() - #verify that mediaconch policies are present + # verify that mediaconch policies are present corefuncs.mediaconch_policy_exists(movPolicy) corefuncs.mediaconch_policy_exists(mkvPolicy) csvInventory = os.path.join(indir, inventoryName) - #TO DO: separate out csv and json related functions that are currently in supportfuncs into dedicated csv or json related py files + # TO DO: separate out csv and json related functions that are currently in supportfuncs into dedicated csv or json related py files csvDict = mov2ffv1supportfuncs.import_csv(csvInventory) - #create the list of csv headers that will go in the qc log csv file + # create the list of csv headers that will go in the qc log csv file csvHeaderList = [ - "Shot Sheet Check", - "Date", - "PM Lossless Transcoding", - "Date", - "File Format & Metadata Verification", - "Date", - "File Inspection", - "Date", - "QC Notes", - "AC Filename", - "PM Filename", - "Runtime" + "Shot Sheet Check", + "Date", + "PM Lossless Transcoding", + "Date", + "File Format & Metadata Verification", + "Date", + "File Inspection", + "Date", + "QC Notes", + "AC Filename", + "PM Filename", + "Runtime", ] - print ("***STARTING PROCESS***") + print("***STARTING PROCESS***") for movFilename in glob.glob1(indir, "*.mov"): - #create names that will be used in the script - #TO DO: handle transcoding legacy files (either need a flag that avoids appending pm to the output filename or the ability to read the desired output filename from the CSV file + # create names that will be used in the script + # TO DO: handle transcoding legacy files (either need a flag that avoids appending pm to the output filename or the ability to read the desired output filename from the CSV file inputAbsPath = os.path.join(indir, movFilename) - baseFilename = movFilename.replace('.mov','') + baseFilename = movFilename.replace(".mov", "") baseOutput = os.path.join(outdir, baseFilename) pmOutputFolder = os.path.join(baseOutput, pm_identifier) - mkvBaseFilename = (baseFilename + pm_filename_identifier ) if pm_filename_identifier else (baseFilename) - mkvFilename = mkvBaseFilename + '.mkv' + mkvBaseFilename = ( + (baseFilename + pm_filename_identifier) + if pm_filename_identifier + else (baseFilename) + ) + mkvFilename = mkvBaseFilename + ".mkv" outputAbsPath = os.path.join(pmOutputFolder, mkvFilename) - tempMasterFile = os.path.join(pmOutputFolder, baseFilename + '-tmp.mkv') - framemd5File = mkvBaseFilename + '.framemd5' + tempMasterFile = os.path.join(pmOutputFolder, baseFilename + "-tmp.mkv") + framemd5File = mkvBaseFilename + ".framemd5" framemd5AbsPath = os.path.join(pmOutputFolder, framemd5File) acOutputFolder = os.path.join(baseOutput, ac_identifier) - acAbsPath = os.path.join(acOutputFolder, baseFilename + '-' + ac_identifier + '.mp4') + acAbsPath = os.path.join( + acOutputFolder, baseFilename + "-" + ac_identifier + ".mp4" + ) metaOutputFolder = os.path.join(baseOutput, metadata_identifier) - jsonAbsPath = os.path.join(metaOutputFolder, baseFilename + '-' + metadata_identifier + '.json') - pmMD5AbsPath = os.path.join(pmOutputFolder, mkvBaseFilename + '.md5') + jsonAbsPath = os.path.join( + metaOutputFolder, baseFilename + "-" + metadata_identifier + ".json" + ) + pmMD5AbsPath = os.path.join(pmOutputFolder, mkvBaseFilename + ".md5") - #generate ffprobe metadata from input + # generate ffprobe metadata from input input_metadata = mov2ffv1supportfuncs.ffprobe_report(movFilename, inputAbsPath) - #create a list of needed output folders and make them + # create a list of needed output folders and make them if not args.skip_ac: outFolders = [pmOutputFolder, acOutputFolder, metaOutputFolder] else: outFolders = [pmOutputFolder, metaOutputFolder] mov2ffv1supportfuncs.create_transcode_output_folders(baseOutput, outFolders) - print ("\n") - #get information about item from csv inventory + print("\n") + # get information about item from csv inventory print("*checking inventory for", baseFilename + "*") item_csvDict = csvDict.get(baseFilename) - #PASS/FAIL - was the file found in the inventory + # PASS/FAIL - was the file found in the inventory inventoryCheck = mov2ffv1passfail_checks.inventory_check(item_csvDict) - print ("*losslessly transcoding", baseFilename + "*") + print("*losslessly transcoding", baseFilename + "*") - #log transcode start time - tstime = datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S') + # log transcode start time + tstime = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S") - #losslessly transcode with ffmpeg + # losslessly transcode with ffmpeg transcode_nameDict = { - 'inputAbsPath' : inputAbsPath, - 'tempMasterFile' : tempMasterFile, - 'framemd5AbsPath' : framemd5AbsPath, - 'outputAbsPath' : outputAbsPath, - 'framemd5File' : framemd5File + "inputAbsPath": inputAbsPath, + "tempMasterFile": tempMasterFile, + "framemd5AbsPath": framemd5AbsPath, + "outputAbsPath": outputAbsPath, + "framemd5File": framemd5File, } - audioStreamCounter = input_metadata['techMetaA']['audio stream count'] - mov2ffv1supportfuncs.ffv1_lossless_transcode(input_metadata, transcode_nameDict, audioStreamCounter) + audioStreamCounter = input_metadata["techMetaA"]["audio stream count"] + mov2ffv1supportfuncs.ffv1_lossless_transcode( + input_metadata, transcode_nameDict, audioStreamCounter + ) - #log transcode finish time - tftime = datetime.datetime.today().strftime('%Y-%m-%d %H:%M:%S') + # log transcode finish time + tftime = datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S") - #If ffv1 file was succesfully created, do remaining verification and transcoding work + # If ffv1 file was succesfully created, do remaining verification and transcoding work if os.path.isfile(outputAbsPath): - #create checksum sidecar file for preservation master - print ("*creating checksum*") + # create checksum sidecar file for preservation master + print("*creating checksum*") mkvHash = corefuncs.hashlib_md5(outputAbsPath) - with open (pmMD5AbsPath, 'w', newline='\n') as f: - print(mkvHash, '*' + mkvFilename, file=f) - - #compare streamMD5s - print ("*verifying losslessness*") - mov_stream_sum = mov2ffv1supportfuncs.checksum_streams(inputAbsPath, audioStreamCounter) - mkv_stream_sum = mov2ffv1supportfuncs.checksum_streams(outputAbsPath, audioStreamCounter) - #PASS/FAIL - check if input stream md5s match output stream md5s - streamMD5status = mov2ffv1passfail_checks.stream_md5_status(mov_stream_sum, mkv_stream_sum) - - #create a dictionary with the mediaconch results from the MOV and MKV files + with open(pmMD5AbsPath, "w", newline="\n") as f: + print(mkvHash, "*" + mkvFilename, file=f) + + # compare streamMD5s + print("*verifying losslessness*") + mov_stream_sum = mov2ffv1supportfuncs.checksum_streams( + inputAbsPath, audioStreamCounter + ) + mkv_stream_sum = mov2ffv1supportfuncs.checksum_streams( + outputAbsPath, audioStreamCounter + ) + # PASS/FAIL - check if input stream md5s match output stream md5s + streamMD5status = mov2ffv1passfail_checks.stream_md5_status( + mov_stream_sum, mkv_stream_sum + ) + + # create a dictionary with the mediaconch results from the MOV and MKV files mediaconchResults_dict = { - 'MOV Mediaconch Policy': mov2ffv1supportfuncs.mediaconch_policy_check(inputAbsPath, movPolicy), - 'MKV Implementation': mov2ffv1supportfuncs.mediaconch_implementation_check(outputAbsPath), - 'MKV Mediaconch Policy': mov2ffv1supportfuncs.mediaconch_policy_check(outputAbsPath, mkvPolicy), + "MOV Mediaconch Policy": mov2ffv1supportfuncs.mediaconch_policy_check( + inputAbsPath, movPolicy + ), + "MKV Implementation": mov2ffv1supportfuncs.mediaconch_implementation_check( + outputAbsPath + ), + "MKV Mediaconch Policy": mov2ffv1supportfuncs.mediaconch_policy_check( + outputAbsPath, mkvPolicy + ), } - #PASS/FAIL - check if any mediaconch results failed and append failed policies to results - mediaconchResults = mov2ffv1passfail_checks.parse_mediaconchResults(mediaconchResults_dict) - - #run ffprobe on the output file - output_metadata = mov2ffv1supportfuncs.ffprobe_report(mkvFilename, outputAbsPath) - #log system info - systemInfo = mov2ffv1supportfuncs.generate_system_log(ffvers, tstime, tftime) - - #PASS/FAIL - are files lossless - losslessCheck = mov2ffv1passfail_checks.lossless_check(input_metadata, output_metadata, streamMD5status) - - #create a dictionary containing QC results - qcResults = mov2ffv1supportfuncs.qc_results(inventoryCheck, losslessCheck, mediaconchResults) - - #create json metadata file - #TO DO: combine checksums into a single dictionary to reduce variables needed here - mov2ffv1supportfuncs.create_json(jsonAbsPath, systemInfo, input_metadata, mov_stream_sum, mkvHash, mkv_stream_sum, baseFilename, output_metadata, item_csvDict, qcResults) + # PASS/FAIL - check if any mediaconch results failed and append failed policies to results + mediaconchResults = mov2ffv1passfail_checks.parse_mediaconchResults( + mediaconchResults_dict + ) + + # run ffprobe on the output file + output_metadata = mov2ffv1supportfuncs.ffprobe_report( + mkvFilename, outputAbsPath + ) + # log system info + systemInfo = mov2ffv1supportfuncs.generate_system_log( + ffvers, tstime, tftime + ) + + # PASS/FAIL - are files lossless + losslessCheck = mov2ffv1passfail_checks.lossless_check( + input_metadata, output_metadata, streamMD5status + ) + + # create a dictionary containing QC results + qcResults = mov2ffv1supportfuncs.qc_results( + inventoryCheck, losslessCheck, mediaconchResults + ) + + # create json metadata file + # TO DO: combine checksums into a single dictionary to reduce variables needed here + mov2ffv1supportfuncs.create_json( + jsonAbsPath, + systemInfo, + input_metadata, + mov_stream_sum, + mkvHash, + mkv_stream_sum, + baseFilename, + output_metadata, + item_csvDict, + qcResults, + ) if not args.skip_ac: - #create access copy - print ('*transcoding access copy*') - mov2ffv1supportfuncs.two_pass_h264_encoding(audioStreamCounter, outputAbsPath, acAbsPath) + # create access copy + print("*transcoding access copy*") + mov2ffv1supportfuncs.two_pass_h264_encoding( + audioStreamCounter, outputAbsPath, acAbsPath + ) - #create checksum sidecar file for access copy + # create checksum sidecar file for access copy acHash = corefuncs.hashlib_md5(acAbsPath) - with open (os.path.join(acOutputFolder, baseFilename + '-' + ac_identifier + '.md5'), 'w', newline='\n') as f: - print(acHash, '*' + baseFilename + '-' + ac_identifier + '.mp4', file=f) - - #log access copy filename if access copy was created - #TO DO: verify that access copy runtime matches pm runtime? + with open( + os.path.join( + acOutputFolder, baseFilename + "-" + ac_identifier + ".md5" + ), + "w", + newline="\n", + ) as f: + print( + acHash, + "*" + baseFilename + "-" + ac_identifier + ".mp4", + file=f, + ) + + # log access copy filename if access copy was created + # TO DO: verify that access copy runtime matches pm runtime? if os.path.isfile(acAbsPath): - acFilename = baseFilename + '-' + ac_identifier + '.mp4' + acFilename = baseFilename + "-" + ac_identifier + ".mp4" else: acFilename = "No access copy found" - #get current date for logging when QC happned - qcDate = str(datetime.datetime.today().strftime('%Y-%m-%d')) + # get current date for logging when QC happned + qcDate = str(datetime.datetime.today().strftime("%Y-%m-%d")) - #create the list that will go in the qc log csv file - #should correspond to the csvHeaderList earlier in the script + # create the list that will go in the qc log csv file + # should correspond to the csvHeaderList earlier in the script csvWriteList = [ - qcResults['QC']['Inventory Check'], - qcDate, - qcResults['QC']['Lossless Check'], - qcDate, - qcResults['QC']['Mediaconch Results'], - qcDate, - None, - None, - None, - acFilename, - mkvFilename, - mov2ffv1supportfuncs.convert_runtime(output_metadata['file metadata']['duration']) + qcResults["QC"]["Inventory Check"], + qcDate, + qcResults["QC"]["Lossless Check"], + qcDate, + qcResults["QC"]["Mediaconch Results"], + qcDate, + None, + None, + None, + acFilename, + mkvFilename, + mov2ffv1supportfuncs.convert_runtime( + output_metadata["file metadata"]["duration"] + ), ] - #Add QC results to QC log csv file - mov2ffv1supportfuncs.write_output_csv(outdir, csvHeaderList, csvWriteList, output_metadata, qcResults) + # Add QC results to QC log csv file + mov2ffv1supportfuncs.write_output_csv( + outdir, csvHeaderList, csvWriteList, output_metadata, qcResults + ) - #create spectrogram for pm audio channels + # create spectrogram for pm audio channels if audioStreamCounter > 0 and not args.skip_spectrogram: - print ("*generating QC spectrograms*") - channel_layout_list = input_metadata['techMetaA']['channels'] - mov2ffv1supportfuncs.generate_spectrogram(outputAbsPath, channel_layout_list, metaOutputFolder, baseFilename) + print("*generating QC spectrograms*") + channel_layout_list = input_metadata["techMetaA"]["channels"] + mov2ffv1supportfuncs.generate_spectrogram( + outputAbsPath, channel_layout_list, metaOutputFolder, baseFilename + ) - #create qctools report + # create qctools report if not args.skip_qcli: - print ("*creating qctools report*") + print("*creating qctools report*") mov2ffv1supportfuncs.generate_qctools(outputAbsPath) else: - print ('No file in output folder. Skipping file processing') - -#TO DO: (low/not priority) add ability to automatically pull trim times from CSV (-ss 00:00:02 -t 02:13:52)? -#import time -#timeIn = [get csv time1] -#timeOut = [get csv time2] -#t1 = datetime.datetime.strptime(timeIn, "%H:%M:%S") -#t2 = datetime.datetime.strptime(timeOut, "%H:%M:%S") -#trimtime = time.strftime('%H:%M:%S', time.gmtime(((60 * ((60 * t2.hour) + t2.minute)) + t2.second) - ((60 * ((60 * t1.hour) + t1.minute)) + t1.second))) + print("No file in output folder. Skipping file processing") + + +# TO DO: (low/not priority) add ability to automatically pull trim times from CSV (-ss 00:00:02 -t 02:13:52)? +# import time +# timeIn = [get csv time1] +# timeOut = [get csv time2] +# t1 = datetime.datetime.strptime(timeIn, "%H:%M:%S") +# t2 = datetime.datetime.strptime(timeOut, "%H:%M:%S") +# trimtime = time.strftime('%H:%M:%S', time.gmtime(((60 * ((60 * t2.hour) + t2.minute)) + t2.second) - ((60 * ((60 * t1.hour) + t1.minute)) + t1.second))) diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1parameters.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1parameters.py index 6be010b..34deefc 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1parameters.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1parameters.py @@ -1,29 +1,133 @@ #!/usr/bin/env python3 -''' +""" Argument parser for in-house AJA v210/mov to ffv1/mkv script -''' +""" import argparse import sys parser = argparse.ArgumentParser() -parser.add_argument('--input', '-i', action='store', dest='input_path', type=str, help='full path to input folder') -parser.add_argument('--output', '-o', action='store', dest='output_path', type=str, help='full path to output folder') -parser.add_argument('--ffmpeg', action='store', dest='ffmpeg_path', default='ffmpeg', type=str, help='For setting a custom ffmpeg path') -parser.add_argument('--ffprobe', action='store', dest='ffprobe_path', default='ffprobe', type=str, help='For setting a custom ffprobe path') -parser.add_argument('--qcli', action='store', dest='qcli_path', default='qcli', type=str, help='For setting a custom qcli path') -parser.add_argument('--mediaconch', action='store', dest='mediaconch_path', default='mediaconch', type=str, help='For setting a custom mediaconch path') -parser.add_argument('--verbose', required=False, action='store_true', help='view ffmpeg output when transcoding') -parser.add_argument('--mixdown', action='store', dest='mixdown', default='copy', type=str, help='How the audio streams will be mapped for the access copy. If excluded, this will default to copying the stream configuration of the input. Inputs include: copy, 4to3, and 4to2. 4to3 takes 4 mono tracks and mixes tracks 1&2 to stereo while leaving tracks 3&4 mono. 4to2 takes 4 mono tracks and mixes tracks 1&2 and 3&4 to stereo.') -parser.add_argument('--slices', action='store', dest='ffv1_slice_count', default='16', choices=[4,6,9,12,16,24,30], type=int, help='Set the FFV1 slice count used by ffmpeg when losslessly transcoding files. Default is 16.') -parser.add_argument('--skipac', required=False, action='store_true', dest='skip_ac', help='skip access copy transcoding') -parser.add_argument('--skipqcli', required=False, action='store_true', dest='skip_qcli', help='skip generating qc tools report') -parser.add_argument('--skipspectrogram', required=False, action='store_true', dest='skip_spectrogram', help='skip generating spectrograms') -parser.add_argument('--keep_filename', required=False, action='store_true', dest='keep_filename', help='MKV preservation master will have the same filename as the source MOV file') -parser.add_argument('--embed_framemd5', required=False, action='store_true', dest='embed_framemd5', help='remux preservation file to embed framemd5') -parser.add_argument('--input_policy', required=False, action='store', dest='input_policy', help='Mediaconch policy for input files') -parser.add_argument('--output_policy', required=False, action='store', dest='output_policy', help='Mediaconch policy for output files') +parser.add_argument( + "--input", + "-i", + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output folder", +) +parser.add_argument( + "--ffmpeg", + action="store", + dest="ffmpeg_path", + default="ffmpeg", + type=str, + help="For setting a custom ffmpeg path", +) +parser.add_argument( + "--ffprobe", + action="store", + dest="ffprobe_path", + default="ffprobe", + type=str, + help="For setting a custom ffprobe path", +) +parser.add_argument( + "--qcli", + action="store", + dest="qcli_path", + default="qcli", + type=str, + help="For setting a custom qcli path", +) +parser.add_argument( + "--mediaconch", + action="store", + dest="mediaconch_path", + default="mediaconch", + type=str, + help="For setting a custom mediaconch path", +) +parser.add_argument( + "--verbose", + required=False, + action="store_true", + help="view ffmpeg output when transcoding", +) +parser.add_argument( + "--mixdown", + action="store", + dest="mixdown", + default="copy", + type=str, + help="How the audio streams will be mapped for the access copy. If excluded, this will default to copying the stream configuration of the input. Inputs include: copy, 4to3, and 4to2. 4to3 takes 4 mono tracks and mixes tracks 1&2 to stereo while leaving tracks 3&4 mono. 4to2 takes 4 mono tracks and mixes tracks 1&2 and 3&4 to stereo.", +) +parser.add_argument( + "--slices", + action="store", + dest="ffv1_slice_count", + default="16", + choices=[4, 6, 9, 12, 16, 24, 30], + type=int, + help="Set the FFV1 slice count used by ffmpeg when losslessly transcoding files. Default is 16.", +) +parser.add_argument( + "--skipac", + required=False, + action="store_true", + dest="skip_ac", + help="skip access copy transcoding", +) +parser.add_argument( + "--skipqcli", + required=False, + action="store_true", + dest="skip_qcli", + help="skip generating qc tools report", +) +parser.add_argument( + "--skipspectrogram", + required=False, + action="store_true", + dest="skip_spectrogram", + help="skip generating spectrograms", +) +parser.add_argument( + "--keep_filename", + required=False, + action="store_true", + dest="keep_filename", + help="MKV preservation master will have the same filename as the source MOV file", +) +parser.add_argument( + "--embed_framemd5", + required=False, + action="store_true", + dest="embed_framemd5", + help="remux preservation file to embed framemd5", +) +parser.add_argument( + "--input_policy", + required=False, + action="store", + dest="input_policy", + help="Mediaconch policy for input files", +) +parser.add_argument( + "--output_policy", + required=False, + action="store", + dest="output_policy", + help="Mediaconch policy for output files", +) args = parser.parse_args() diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1passfail_checks.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1passfail_checks.py index 3c20104..e1c1b00 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1passfail_checks.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1passfail_checks.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 + def inventory_check(item_csvDict): if item_csvDict is None: print("unable to locate file in csv data!") @@ -9,6 +10,7 @@ def inventory_check(item_csvDict): inventoryCheck = "PASS" return inventoryCheck + def parse_mediaconchResults(mediaconchResults_dict): if "FAIL" in mediaconchResults_dict.values(): mediaconchResults = "FAIL" @@ -16,36 +18,45 @@ def parse_mediaconchResults(mediaconchResults_dict): for key in mediaconchResults_dict.keys(): if "FAIL" in mediaconchResults_dict.get(key): failed_policies.append(key) - mediaconchResults = mediaconchResults + ': ' + str(failed_policies).strip('[]') + mediaconchResults = mediaconchResults + ": " + str(failed_policies).strip("[]") else: mediaconchResults = "PASS" return mediaconchResults + def stream_md5_status(input_streammd5, output_streammd5): if output_streammd5 == input_streammd5: - print ('stream checksums match. Your file is lossless') + print("stream checksums match. Your file is lossless") streamMD5status = "PASS" else: - print ('stream checksums do not match. Output file may not be lossless') + print("stream checksums do not match. Output file may not be lossless") streamMD5status = "FAIL" return streamMD5status + def lossless_check(input_metadata, output_metadata, streamMD5status): - if output_metadata.get('output_techMetaA') == input_metadata.get('input_techMetaA') and output_metadata.get('output_techMetaV') == output_metadata.get('input_techMetaV'): + if output_metadata.get("output_techMetaA") == input_metadata.get( + "input_techMetaA" + ) and output_metadata.get("output_techMetaV") == output_metadata.get( + "input_techMetaV" + ): QC_techMeta = "PASS" else: print("input and output technical metadata do not match") QC_techMeta = "FAIL" - - losslessCheckDict = {'technical metadata' : QC_techMeta, 'stream checksums' : streamMD5status} + + losslessCheckDict = { + "technical metadata": QC_techMeta, + "stream checksums": streamMD5status, + } if "FAIL" in losslessCheckDict.values(): losslessCheck = "FAIL" losslessFail = [] for key in losslessCheckDict.keys(): if "FAIL" in losslessCheckDict.get(key): losslessFail.append(key) - losslessCheck = losslessCheck + ': ' + str(losslessFail).strip('[]') + losslessCheck = losslessCheck + ": " + str(losslessFail).strip("[]") else: losslessCheck = "PASS" - - return losslessCheck \ No newline at end of file + + return losslessCheck diff --git a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1supportfuncs.py b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1supportfuncs.py index 30cef54..9a2208c 100644 --- a/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1supportfuncs.py +++ b/AV/Video/aja_mov2ffv1/aja_mov2ffv1/mov2ffv1supportfuncs.py @@ -11,271 +11,550 @@ from aja_mov2ffv1 import equipment_dict from aja_mov2ffv1.mov2ffv1parameters import args + def create_transcode_output_folders(baseOutput, outputFolderList): if not os.path.isdir(baseOutput): try: os.mkdir(baseOutput) except: - print ("unable to create output folder:", baseOutput) + print("unable to create output folder:", baseOutput) quit() else: - print (baseOutput, "already exists") - print ('Proceeding') + print(baseOutput, "already exists") + print("Proceeding") for folder in outputFolderList: if not os.path.isdir(folder): try: os.mkdir(folder) except: - print ("unable to create output folder:", folder) + print("unable to create output folder:", folder) quit() else: - print ("using existing folder", folder, "as output") + print("using existing folder", folder, "as output") + def check_mixdown_arg(): - mixdown_list = ['copy', '4to3', '4to2', '2to1'] - #TO DO add swap as an option to allow switching tracks 3&4 with tracks 1&2 + mixdown_list = ["copy", "4to3", "4to2", "2to1"] + # TO DO add swap as an option to allow switching tracks 3&4 with tracks 1&2 if not args.mixdown in mixdown_list: print("The selected audio mixdown is not a valid value") - print ("please use one of: copy, 4to3, 4to2, 2to1") + print("please use one of: copy, 4to3, 4to2, 2to1") quit() + def ffprobe_report(filename, input_file_abspath): - ''' + """ returns nested dictionary with ffprobe metadata - ''' - video_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'v', '-show_entries', 'stream=codec_name,avg_frame_rate,codec_time_base,width,height,pix_fmt,sample_aspect_ratio,display_aspect_ratio,color_range,color_space,color_transfer,color_primaries,chroma_location,field_order,codec_tag_string', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - audio_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'a', '-show_entries', 'stream=codec_long_name,bits_per_raw_sample,sample_rate,channels', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - format_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-show_entries', 'format=duration,size,nb_streams', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - data_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 'd', '-show_entries', 'stream=codec_tag_string', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - attachment_output = json.loads(subprocess.check_output([args.ffprobe_path, '-v', 'error', '-select_streams', 't', '-show_entries', 'stream_tags=filename', input_file_abspath, '-of', 'json']).decode("ascii").rstrip()) - - #cleaning up attachment output - tags = [streams.get('tags') for streams in (attachment_output['streams'])] + """ + video_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "v", + "-show_entries", + "stream=codec_name,avg_frame_rate,codec_time_base,width,height,pix_fmt,sample_aspect_ratio,display_aspect_ratio,color_range,color_space,color_transfer,color_primaries,chroma_location,field_order,codec_tag_string", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + audio_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=codec_long_name,bits_per_raw_sample,sample_rate,channels", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + format_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-show_entries", + "format=duration,size,nb_streams", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + data_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "d", + "-show_entries", + "stream=codec_tag_string", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + attachment_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "t", + "-show_entries", + "stream_tags=filename", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + + # cleaning up attachment output + tags = [streams.get("tags") for streams in (attachment_output["streams"])] attachment_list = [] for i in tags: - attachmentFilename = [i.get('filename')] + attachmentFilename = [i.get("filename")] attachment_list.extend(attachmentFilename) - #parse ffprobe metadata lists - video_codec_name_list = [stream.get('codec_name') for stream in (video_output['streams'])] - audio_codec_name_list = [stream.get('codec_long_name') for stream in (audio_output['streams'])] - data_streams = [stream.get('codec_tag_string') for stream in (data_output['streams'])] - width = [stream.get('width') for stream in (video_output['streams'])][0] - height = [stream.get('height') for stream in (video_output['streams'])][0] - pixel_format = [stream.get('pix_fmt') for stream in (video_output['streams'])][0] - sar = [stream.get('sample_aspect_ratio') for stream in (video_output['streams'])][0] - dar = [stream.get('display_aspect_ratio') for stream in (video_output['streams'])][0] - framerate = [stream.get('avg_frame_rate') for stream in (video_output['streams'])][0] - color_space = [stream.get('color_space') for stream in (video_output['streams'])][0] - color_range = [stream.get('color_range') for stream in (video_output['streams'])][0] - color_transfer = [stream.get('color_transfer') for stream in (video_output['streams'])][0] - color_primaries = [stream.get('color_primaries') for stream in (video_output['streams'])][0] - audio_bitrate = [stream.get('bits_per_raw_sample') for stream in (audio_output['streams'])] - audio_sample_rate = [stream.get('sample_rate') for stream in (audio_output['streams'])] - audio_channels = [stream.get('channels') for stream in (audio_output['streams'])] + # parse ffprobe metadata lists + video_codec_name_list = [ + stream.get("codec_name") for stream in (video_output["streams"]) + ] + audio_codec_name_list = [ + stream.get("codec_long_name") for stream in (audio_output["streams"]) + ] + data_streams = [ + stream.get("codec_tag_string") for stream in (data_output["streams"]) + ] + width = [stream.get("width") for stream in (video_output["streams"])][0] + height = [stream.get("height") for stream in (video_output["streams"])][0] + pixel_format = [stream.get("pix_fmt") for stream in (video_output["streams"])][0] + sar = [stream.get("sample_aspect_ratio") for stream in (video_output["streams"])][0] + dar = [stream.get("display_aspect_ratio") for stream in (video_output["streams"])][ + 0 + ] + framerate = [stream.get("avg_frame_rate") for stream in (video_output["streams"])][ + 0 + ] + color_space = [stream.get("color_space") for stream in (video_output["streams"])][0] + color_range = [stream.get("color_range") for stream in (video_output["streams"])][0] + color_transfer = [ + stream.get("color_transfer") for stream in (video_output["streams"]) + ][0] + color_primaries = [ + stream.get("color_primaries") for stream in (video_output["streams"]) + ][0] + audio_bitrate = [ + stream.get("bits_per_raw_sample") for stream in (audio_output["streams"]) + ] + audio_sample_rate = [ + stream.get("sample_rate") for stream in (audio_output["streams"]) + ] + audio_channels = [stream.get("channels") for stream in (audio_output["streams"])] audio_stream_count = len(audio_codec_name_list) file_metadata = { - 'filename' : filename, - 'file size' : format_output.get('format')['size'], - 'duration' : format_output.get('format')['duration'], - 'streams' : format_output.get('format')['nb_streams'], - 'video streams' : video_codec_name_list, - 'audio streams' : audio_codec_name_list, - 'data streams' : data_streams, - 'attachments' : attachment_list + "filename": filename, + "file size": format_output.get("format")["size"], + "duration": format_output.get("format")["duration"], + "streams": format_output.get("format")["nb_streams"], + "video streams": video_codec_name_list, + "audio streams": audio_codec_name_list, + "data streams": data_streams, + "attachments": attachment_list, } techMetaV = { - 'width' : width, - 'height' : height, - 'sample aspect ratio' : sar, - 'display aspect ratio' : dar, - 'pixel format' : pixel_format, - 'framerate' : framerate, - 'color space' : color_space, - 'color range' : color_range, - 'color primaries' : color_primaries, - 'color transfer' : color_transfer + "width": width, + "height": height, + "sample aspect ratio": sar, + "display aspect ratio": dar, + "pixel format": pixel_format, + "framerate": framerate, + "color space": color_space, + "color range": color_range, + "color primaries": color_primaries, + "color transfer": color_transfer, } techMetaA = { - 'audio stream count' : audio_stream_count, - 'audio bitrate' : audio_bitrate, - 'audio sample rate' : audio_sample_rate, - 'channels' : audio_channels + "audio stream count": audio_stream_count, + "audio bitrate": audio_bitrate, + "audio sample rate": audio_sample_rate, + "channels": audio_channels, } - ffprobe_metadata = {'file metadata' : file_metadata, 'techMetaV' : techMetaV, 'techMetaA' : techMetaA} + ffprobe_metadata = { + "file metadata": file_metadata, + "techMetaV": techMetaV, + "techMetaA": techMetaA, + } return ffprobe_metadata + def ffv1_lossless_transcode(input_metadata, transcode_nameDict, audioStreamCounter): - #get relevant names from nameDict - inputAbsPath = transcode_nameDict.get('inputAbsPath') - tempMasterFile = transcode_nameDict.get('tempMasterFile') - framemd5AbsPath = transcode_nameDict.get('framemd5AbsPath') - outputAbsPath = transcode_nameDict.get('outputAbsPath') - framemd5File = transcode_nameDict.get('framemd5File') - - #create ffmpeg command + # get relevant names from nameDict + inputAbsPath = transcode_nameDict.get("inputAbsPath") + tempMasterFile = transcode_nameDict.get("tempMasterFile") + framemd5AbsPath = transcode_nameDict.get("framemd5AbsPath") + outputAbsPath = transcode_nameDict.get("outputAbsPath") + framemd5File = transcode_nameDict.get("framemd5File") + + # create ffmpeg command ffmpeg_command = [args.ffmpeg_path] if not args.verbose: - ffmpeg_command.extend(('-loglevel', 'error')) - ffmpeg_command.extend(['-i', inputAbsPath, '-map', '0', '-dn', '-c:v', 'ffv1', '-level', '3', '-g', '1', '-slices', str(args.ffv1_slice_count), '-slicecrc', '1']) - #TO DO: consider putting color data in a list or dict to replace the following if statements with a single if statement in a for loop - if input_metadata['techMetaV']['color primaries']: - ffmpeg_command.extend(('-color_primaries', input_metadata['techMetaV']['color primaries'])) - if input_metadata['techMetaV']['color transfer']: - ffmpeg_command.extend(('-color_trc', input_metadata['techMetaV']['color transfer'])) - if input_metadata['techMetaV']['color space']: - ffmpeg_command.extend(('-colorspace', input_metadata['techMetaV']['color space'])) + ffmpeg_command.extend(("-loglevel", "error")) + ffmpeg_command.extend( + [ + "-i", + inputAbsPath, + "-map", + "0", + "-dn", + "-c:v", + "ffv1", + "-level", + "3", + "-g", + "1", + "-slices", + str(args.ffv1_slice_count), + "-slicecrc", + "1", + ] + ) + # TO DO: consider putting color data in a list or dict to replace the following if statements with a single if statement in a for loop + if input_metadata["techMetaV"]["color primaries"]: + ffmpeg_command.extend( + ("-color_primaries", input_metadata["techMetaV"]["color primaries"]) + ) + if input_metadata["techMetaV"]["color transfer"]: + ffmpeg_command.extend( + ("-color_trc", input_metadata["techMetaV"]["color transfer"]) + ) + if input_metadata["techMetaV"]["color space"]: + ffmpeg_command.extend( + ("-colorspace", input_metadata["techMetaV"]["color space"]) + ) if audioStreamCounter > 0: - ffmpeg_command.extend(('-c:a', 'copy')) - ffmpeg_command.extend((tempMasterFile if args.embed_framemd5 else outputAbsPath, '-f', 'framemd5', '-an', framemd5AbsPath)) - - #execute ffmpeg command + ffmpeg_command.extend(("-c:a", "copy")) + ffmpeg_command.extend( + ( + tempMasterFile if args.embed_framemd5 else outputAbsPath, + "-f", + "framemd5", + "-an", + framemd5AbsPath, + ) + ) + + # execute ffmpeg command subprocess.run(ffmpeg_command) - #remux to attach framemd5 + # remux to attach framemd5 if args.embed_framemd5: - add_attachment = [args.ffmpeg_path, '-loglevel', 'error', '-i', tempMasterFile, '-c', 'copy', '-map', '0', '-attach', framemd5AbsPath, '-metadata:s:t:0', 'mimetype=application/octet-stream', '-metadata:s:t:0', 'filename=' + framemd5File, outputAbsPath] + add_attachment = [ + args.ffmpeg_path, + "-loglevel", + "error", + "-i", + tempMasterFile, + "-c", + "copy", + "-map", + "0", + "-attach", + framemd5AbsPath, + "-metadata:s:t:0", + "mimetype=application/octet-stream", + "-metadata:s:t:0", + "filename=" + framemd5File, + outputAbsPath, + ] if os.path.isfile(tempMasterFile): subprocess.call(add_attachment) filesToDelete = [tempMasterFile, framemd5AbsPath] delete_files(filesToDelete) else: - print ("There was an issue finding the file", tempMasterFile) + print("There was an issue finding the file", tempMasterFile) + def delete_files(list): - ''' + """ Loops through a list of files and tries to delete them - ''' + """ for i in list: try: os.remove(i) except FileNotFoundError: - print ("unable to delete " + i) - print ("File not found") + print("unable to delete " + i) + print("File not found") + def checksum_streams(input, audioStreamCounter): - ''' + """ Gets the stream md5 of a file Uses both video and all audio streams if audio is present - ''' - stream_sum=[] - stream_sum_command = [args.ffmpeg_path, '-loglevel', 'error', '-i', input, '-map', '0:v', '-an'] - - stream_sum_command.extend(('-f', 'md5', '-')) - video_stream_sum = subprocess.check_output(stream_sum_command).decode("ascii").rstrip() - stream_sum.append(video_stream_sum.replace('MD5=', '')) + """ + stream_sum = [] + stream_sum_command = [ + args.ffmpeg_path, + "-loglevel", + "error", + "-i", + input, + "-map", + "0:v", + "-an", + ] + + stream_sum_command.extend(("-f", "md5", "-")) + video_stream_sum = ( + subprocess.check_output(stream_sum_command).decode("ascii").rstrip() + ) + stream_sum.append(video_stream_sum.replace("MD5=", "")) for i in range(audioStreamCounter): audio_sum_command = [args.ffmpeg_path] - audio_sum_command += ['-loglevel', 'error', '-y', '-i', input] - audio_sum_command += ['-vn', '-map', '0:a:%(a)s' % {"a" : i}] - audio_sum_command += ['-c:a', 'pcm_s24le', '-f', 'md5', '-'] - audio_stream_sum = subprocess.check_output(audio_sum_command).decode("ascii").rstrip() - stream_sum.append(audio_stream_sum.replace('MD5=', '')) + audio_sum_command += ["-loglevel", "error", "-y", "-i", input] + audio_sum_command += ["-vn", "-map", "0:a:%(a)s" % {"a": i}] + audio_sum_command += ["-c:a", "pcm_s24le", "-f", "md5", "-"] + audio_stream_sum = ( + subprocess.check_output(audio_sum_command).decode("ascii").rstrip() + ) + stream_sum.append(audio_stream_sum.replace("MD5=", "")) return stream_sum + def two_pass_h264_encoding(audioStreamCounter, outputAbsPath, acAbsPath): - if os.name == 'nt': - nullOut = 'NUL' + if os.name == "nt": + nullOut = "NUL" else: - nullOut = '/dev/null' + nullOut = "/dev/null" pass1 = [args.ffmpeg_path] if not args.verbose: - pass1 += ['-loglevel', 'error'] - pass1 += ['-y', '-i', outputAbsPath, '-c:v', 'libx264', '-preset', 'medium', '-b:v', '8000k', '-pix_fmt', 'yuv420p', '-pass', '1'] + pass1 += ["-loglevel", "error"] + pass1 += [ + "-y", + "-i", + outputAbsPath, + "-c:v", + "libx264", + "-preset", + "medium", + "-b:v", + "8000k", + "-pix_fmt", + "yuv420p", + "-pass", + "1", + ] if audioStreamCounter > 0: - if args.mixdown == 'copy': - pass1 += ['-c:a', 'aac', '-b:a', '128k'] - if args.mixdown == '4to3' and audioStreamCounter == 4: - pass1 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a]', '-map', '0:v', '-map', '[a]', '-map', '0:a:2', '-map', '0:a:3'] - if args.mixdown == '4to2' and audioStreamCounter == 4: - pass1 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a];[0:a:2][0:a:3]amerge=inputs=2[b]', '-map', '0:v', '-map', '[a]', '-map', '[b]'] - if args.mixdown == '2to1' and audioStreamCounter == 2: - pass1 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a]', '-map', '0:v', '-map', '[a]'] - pass1 += ['-f', 'mp4', nullOut] + if args.mixdown == "copy": + pass1 += ["-c:a", "aac", "-b:a", "128k"] + if args.mixdown == "4to3" and audioStreamCounter == 4: + pass1 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]", + "-map", + "0:v", + "-map", + "[a]", + "-map", + "0:a:2", + "-map", + "0:a:3", + ] + if args.mixdown == "4to2" and audioStreamCounter == 4: + pass1 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a];[0:a:2][0:a:3]amerge=inputs=2[b]", + "-map", + "0:v", + "-map", + "[a]", + "-map", + "[b]", + ] + if args.mixdown == "2to1" and audioStreamCounter == 2: + pass1 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]", + "-map", + "0:v", + "-map", + "[a]", + ] + pass1 += ["-f", "mp4", nullOut] pass2 = [args.ffmpeg_path] if not args.verbose: - pass2 += ['-loglevel', 'error'] - pass2 += ['-y', '-i', outputAbsPath, '-c:v', 'libx264', '-preset', 'medium', '-b:v', '8000k', '-pix_fmt', 'yuv420p', '-pass', '2'] + pass2 += ["-loglevel", "error"] + pass2 += [ + "-y", + "-i", + outputAbsPath, + "-c:v", + "libx264", + "-preset", + "medium", + "-b:v", + "8000k", + "-pix_fmt", + "yuv420p", + "-pass", + "2", + ] if audioStreamCounter > 0: - if args.mixdown == 'copy': - pass2 += ['-c:a', 'aac', '-b:a', '128k'] - if args.mixdown == '4to3' and audioStreamCounter == 4: - pass2 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a]', '-map', '0:v', '-map', '[a]', '-map', '0:a:2', '-map', '0:a:3'] - if args.mixdown == '4to2' and audioStreamCounter == 4: - pass2 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a];[0:a:2][0:a:3]amerge=inputs=2[b]', '-map', '0:v', '-map', '[a]', '-map', '[b]'] - if args.mixdown == '2to1' and audioStreamCounter == 2: - pass2 += ['-filter_complex', '[0:a:0][0:a:1]amerge=inputs=2[a]', '-map', '0:v', '-map', '[a]'] + if args.mixdown == "copy": + pass2 += ["-c:a", "aac", "-b:a", "128k"] + if args.mixdown == "4to3" and audioStreamCounter == 4: + pass2 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]", + "-map", + "0:v", + "-map", + "[a]", + "-map", + "0:a:2", + "-map", + "0:a:3", + ] + if args.mixdown == "4to2" and audioStreamCounter == 4: + pass2 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a];[0:a:2][0:a:3]amerge=inputs=2[b]", + "-map", + "0:v", + "-map", + "[a]", + "-map", + "[b]", + ] + if args.mixdown == "2to1" and audioStreamCounter == 2: + pass2 += [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]", + "-map", + "0:v", + "-map", + "[a]", + ] pass2 += [acAbsPath] subprocess.run(pass1) subprocess.run(pass2) + def generate_spectrogram(input, channel_layout_list, outputFolder, outputName): - ''' + """ Creates a spectrogram for each audio track in the input - ''' + """ spectrogram_resolution = "1920x1080" for index, item in enumerate(channel_layout_list): - output = os.path.join(outputFolder, outputName + '_0a' + str(index) + '.png') + output = os.path.join(outputFolder, outputName + "_0a" + str(index) + ".png") spectrogram_args = [args.ffmpeg_path] - spectrogram_args += ['-loglevel', 'error', '-y'] - spectrogram_args += ['-i', input, '-lavfi'] + spectrogram_args += ["-loglevel", "error", "-y"] + spectrogram_args += ["-i", input, "-lavfi"] if item > 1: - spectrogram_args += ['[0:a:%(a)s]showspectrumpic=mode=separate:s=%(b)s' % {"a" : index, "b" : spectrogram_resolution}] + spectrogram_args += [ + "[0:a:%(a)s]showspectrumpic=mode=separate:s=%(b)s" + % {"a": index, "b": spectrogram_resolution} + ] else: - spectrogram_args += ['[0:a:%(a)s]showspectrumpic=s=%(b)s' % {"a" : index, "b" : spectrogram_resolution}] + spectrogram_args += [ + "[0:a:%(a)s]showspectrumpic=s=%(b)s" + % {"a": index, "b": spectrogram_resolution} + ] spectrogram_args += [output] subprocess.run(spectrogram_args) + def generate_qctools(input): - ''' + """ uses qcli to generate a QCTools report - ''' - qctools_args = [args.qcli_path, '-i', input] + """ + qctools_args = [args.qcli_path, "-i", input] subprocess.run(qctools_args) + def mediaconch_policy_check(input, policy): - mediaconchResults = subprocess.check_output([args.mediaconch_path, '--policy=' + policy, input]).decode("ascii").rstrip().split()[0] + mediaconchResults = ( + subprocess.check_output([args.mediaconch_path, "--policy=" + policy, input]) + .decode("ascii") + .rstrip() + .split()[0] + ) if mediaconchResults == "pass!": mediaconchResults = "PASS" else: mediaconchResults = "FAIL" return mediaconchResults + def mediaconch_implementation_check(input): - mediaconchResults = subprocess.check_output([args.mediaconch_path, input]).decode("ascii").rstrip().split()[0] + mediaconchResults = ( + subprocess.check_output([args.mediaconch_path, input]) + .decode("ascii") + .rstrip() + .split()[0] + ) if mediaconchResults == "pass!": mediaconchResults = "PASS" else: mediaconchResults = "FAIL" return mediaconchResults + def generate_system_log(ffvers, tstime, tftime): - #gather system info for json output + # gather system info for json output osinfo = platform.platform() systemInfo = { - 'operating system': osinfo, - 'ffmpeg version': ffvers, - 'transcode start time': tstime, - 'transcode end time': tftime - #TO DO: add capture software/version maybe -- would have to pull from csv + "operating system": osinfo, + "ffmpeg version": ffvers, + "transcode start time": tstime, + "transcode end time": tftime + # TO DO: add capture software/version maybe -- would have to pull from csv } return systemInfo + def qc_results(inventoryCheck, losslessCheck, mediaconchResults): QC_results = {} - QC_results['QC'] = { - 'Inventory Check': inventoryCheck, - 'Lossless Check': losslessCheck, - 'Mediaconch Results': mediaconchResults, + QC_results["QC"] = { + "Inventory Check": inventoryCheck, + "Lossless Check": losslessCheck, + "Mediaconch Results": mediaconchResults, } return QC_results + def guess_date(string): for fmt in ["%m/%d/%Y", "%d-%m-%Y", "%m/%d/%y", "%Y-%m-%d"]: try: @@ -284,165 +563,219 @@ def guess_date(string): continue raise ValueError(string) + def generate_coding_history(coding_history, hardware, append_list): - ''' + """ Formats hardware into BWF style coding history. Takes a piece of hardware (formatted: 'model; serial No.'), splits it at ';' and then searches the equipment dictionary for that piece of hardware. Then iterates through a list of other fields to append in the free text section. If the hardware is not found in the equipment dictionary this will just pull the info from the csv file and leave out some of the BWF formatting. - ''' + """ equipmentDict = equipment_dict.equipment_dict() - if hardware.split(';')[0] in equipmentDict.keys(): - hardware_history = equipmentDict[hardware.split(';')[0]]['Coding Algorithm'] + ',' + 'T=' + hardware + if hardware.split(";")[0] in equipmentDict.keys(): + hardware_history = ( + equipmentDict[hardware.split(";")[0]]["Coding Algorithm"] + + "," + + "T=" + + hardware + ) for i in append_list: if i: - hardware_history += '; ' + hardware_history += "; " hardware_history += i - if 'Hardware Type' in equipmentDict.get(hardware.split(';')[0]): - hardware_history += '; ' - hardware_history += equipmentDict[hardware.split(';')[0]]['Hardware Type'] + if "Hardware Type" in equipmentDict.get(hardware.split(";")[0]): + hardware_history += "; " + hardware_history += equipmentDict[hardware.split(";")[0]]["Hardware Type"] coding_history.append(hardware_history) - #handle case where equipment is not in the equipmentDict using a more general format - elif hardware and not hardware.split(';')[0] in equipmentDict.keys(): + # handle case where equipment is not in the equipmentDict using a more general format + elif hardware and not hardware.split(";")[0] in equipmentDict.keys(): hardware_history = hardware for i in append_list: if i: - hardware_history += '; ' + hardware_history += "; " hardware_history += i coding_history.append(hardware_history) else: pass return coding_history + def import_csv(csvInventory): csvDict = {} try: - with open(csvInventory, encoding='utf-8')as f: - reader = csv.DictReader(f, delimiter=',') - video_fieldnames_list = ['filename', 'work_accession_number', 'ALMA number/Finding Aid', 'Barcode', 'inventory_title', 'Record Date/Time', 'Housing/Container Markings', 'Condition Notes', 'Format', 'Capture Date', 'Digitizer', 'VTR', 'VTR Output Used', 'Tape Brand', 'Tape Record Mode', 'TBC', 'TBC Output Used', 'ADC', 'Capture Card', 'Sound', 'Region', 'Capture notes'] - missing_fieldnames = [i for i in video_fieldnames_list if not i in reader.fieldnames] + with open(csvInventory, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=",") + video_fieldnames_list = [ + "filename", + "work_accession_number", + "ALMA number/Finding Aid", + "Barcode", + "inventory_title", + "Record Date/Time", + "Housing/Container Markings", + "Condition Notes", + "Format", + "Capture Date", + "Digitizer", + "VTR", + "VTR Output Used", + "Tape Brand", + "Tape Record Mode", + "TBC", + "TBC Output Used", + "ADC", + "Capture Card", + "Sound", + "Region", + "Capture notes", + ] + missing_fieldnames = [ + i for i in video_fieldnames_list if not i in reader.fieldnames + ] if not missing_fieldnames: for row in reader: - name = row['filename'] - id1 = row['work_accession_number'] - id2 = row['ALMA number/Finding Aid'] - id3 = row['Barcode'] - title = row['inventory_title'] - record_date = row['Record Date/Time'] - container_markings = row['Housing/Container Markings'] - container_markings = container_markings.split('\n') - condition_notes = row['Condition Notes'] - format = row['Format'] - captureDate = row['Capture Date'] - #try to format date as yyyy-mm-dd if not formatted correctly + name = row["filename"] + id1 = row["work_accession_number"] + id2 = row["ALMA number/Finding Aid"] + id3 = row["Barcode"] + title = row["inventory_title"] + record_date = row["Record Date/Time"] + container_markings = row["Housing/Container Markings"] + container_markings = container_markings.split("\n") + condition_notes = row["Condition Notes"] + format = row["Format"] + captureDate = row["Capture Date"] + # try to format date as yyyy-mm-dd if not formatted correctly if captureDate: captureDate = str(guess_date(captureDate)) - digitizationOperator = row['Digitizer'] - vtr = row['VTR'] - vtrOut = row['VTR Output Used'] - tapeBrand = row['Tape Brand'] - recordMode = row['Tape Record Mode'] - tbc = row['TBC'] - tbcOut = row['TBC Output Used'] - adc = row['ADC'] - dio = row['Capture Card'] - sound = row['Sound'] - sound = sound.split('\n') - region = row['Region'] - capture_notes = row['Capture notes'] + digitizationOperator = row["Digitizer"] + vtr = row["VTR"] + vtrOut = row["VTR Output Used"] + tapeBrand = row["Tape Brand"] + recordMode = row["Tape Record Mode"] + tbc = row["TBC"] + tbcOut = row["TBC Output Used"] + adc = row["ADC"] + dio = row["Capture Card"] + sound = row["Sound"] + sound = sound.split("\n") + region = row["Region"] + capture_notes = row["Capture notes"] coding_history = [] - coding_history = generate_coding_history(coding_history, vtr, [tapeBrand, recordMode, region, vtrOut]) - coding_history = generate_coding_history(coding_history, tbc, [tbcOut]) - coding_history = generate_coding_history(coding_history, adc, [None]) - coding_history = generate_coding_history(coding_history, dio, [None]) + coding_history = generate_coding_history( + coding_history, vtr, [tapeBrand, recordMode, region, vtrOut] + ) + coding_history = generate_coding_history( + coding_history, tbc, [tbcOut] + ) + coding_history = generate_coding_history( + coding_history, adc, [None] + ) + coding_history = generate_coding_history( + coding_history, dio, [None] + ) csvData = { - 'Accession number/Call number' : id1, - 'ALMA number/Finding Aid' : id2, - 'Barcode' : id3, - 'Title' : title, - 'Record Date' : record_date, - 'Container Markings' : container_markings, - 'Condition Notes' : condition_notes, - 'Format' : format, - 'Digitization Operator' : digitizationOperator, - 'Capture Date' : captureDate, - 'Coding History' : coding_history, - 'Sound Note' : sound, - 'Capture Notes' : capture_notes + "Accession number/Call number": id1, + "ALMA number/Finding Aid": id2, + "Barcode": id3, + "Title": title, + "Record Date": record_date, + "Container Markings": container_markings, + "Condition Notes": condition_notes, + "Format": format, + "Digitization Operator": digitizationOperator, + "Capture Date": captureDate, + "Coding History": coding_history, + "Sound Note": sound, + "Capture Notes": capture_notes, } - csvDict.update({name : csvData}) - elif not 'File name' in missing_fieldnames: + csvDict.update({name: csvData}) + elif not "File name" in missing_fieldnames: print("WARNING: Unable to find all column names in csv file") print("File name column found. Interpreting csv file as file list") print("CONTINUE? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: - for row in reader: - name = row['File name'] + for row in reader: + name = row["File name"] csvData = {} - csvDict.update({name : csvData}) + csvDict.update({name: csvData}) elif choice in no: - quit() + quit() else: - sys.stdout.write("Please respond with 'yes' or 'no'") - quit() + sys.stdout.write("Please respond with 'yes' or 'no'") + quit() else: print("No matching column names found in csv file") - #print(csvDict) + # print(csvDict) except FileNotFoundError: print("Issue importing csv file") return csvDict + def convert_runtime(duration): runtime = time.strftime("%H:%M:%S", time.gmtime(float(duration))) return runtime + def write_output_csv(outdir, csvHeaderList, csvWriteList, output_metadata, qcResults): csv_file = os.path.join(outdir, "qc_log.csv") csvOutFileExists = os.path.isfile(csv_file) - with open(csv_file, 'a') as f: - writer = csv.writer(f, delimiter=',', lineterminator='\n') + with open(csv_file, "a") as f: + writer = csv.writer(f, delimiter=",", lineterminator="\n") if not csvOutFileExists: writer.writerow(csvHeaderList) writer.writerow(csvWriteList) -def create_json(jsonAbsPath, systemInfo, input_metadata, mov_stream_sum, mkvHash, mkv_stream_sum, baseFilename, output_metadata, item_csvDict, qcResults): - input_techMetaV = input_metadata.get('techMetaV') - input_techMetaA = input_metadata.get('techMetaA') - input_file_metadata = input_metadata.get('file metadata') - output_techMetaV = output_metadata.get('techMetaV') - output_techMetaA = output_metadata.get('techMetaA') - output_file_metadata = output_metadata.get('file metadata') - #create dictionary for json output +def create_json( + jsonAbsPath, + systemInfo, + input_metadata, + mov_stream_sum, + mkvHash, + mkv_stream_sum, + baseFilename, + output_metadata, + item_csvDict, + qcResults, +): + input_techMetaV = input_metadata.get("techMetaV") + input_techMetaA = input_metadata.get("techMetaA") + input_file_metadata = input_metadata.get("file metadata") + output_techMetaV = output_metadata.get("techMetaV") + output_techMetaA = output_metadata.get("techMetaA") + output_file_metadata = output_metadata.get("file metadata") + + # create dictionary for json output data = {} data[baseFilename] = [] - #gather pre and post transcode file metadata for json output + # gather pre and post transcode file metadata for json output mov_file_meta = {} ffv1_file_meta = {} - #add stream checksums to metadata - mov_md5_dict = {'a/v streamMD5s': mov_stream_sum} - ffv1_md5_dict = {'md5 checksum': mkvHash, 'a/v streamMD5s': mkv_stream_sum} + # add stream checksums to metadata + mov_md5_dict = {"a/v streamMD5s": mov_stream_sum} + ffv1_md5_dict = {"md5 checksum": mkvHash, "a/v streamMD5s": mkv_stream_sum} input_file_metadata = {**input_file_metadata, **mov_md5_dict} output_file_metadata = {**output_file_metadata, **ffv1_md5_dict} - ffv1_file_meta = {'post-transcode metadata' : output_file_metadata} - mov_file_meta = {'pre-transcode metadata' : input_file_metadata} + ffv1_file_meta = {"post-transcode metadata": output_file_metadata} + mov_file_meta = {"pre-transcode metadata": input_file_metadata} - #gather technical metadata for json output + # gather technical metadata for json output techdata = {} video_techdata = {} audio_techdata = {} - techdata['technical metadata'] = [] - video_techdata = {'video' : input_techMetaV} - audio_techdata = {'audio' : input_techMetaA} - techdata['technical metadata'].append(video_techdata) - techdata['technical metadata'].append(audio_techdata) + techdata["technical metadata"] = [] + video_techdata = {"video": input_techMetaV} + audio_techdata = {"audio": input_techMetaA} + techdata["technical metadata"].append(video_techdata) + techdata["technical metadata"].append(audio_techdata) - #gather metadata from csv dictionary as capture metadata - csv_metadata = {'inventory metadata' : item_csvDict} + # gather metadata from csv dictionary as capture metadata + csv_metadata = {"inventory metadata": item_csvDict} - system_info = {'system information' : systemInfo} + system_info = {"system information": systemInfo} data[baseFilename].append(csv_metadata) data[baseFilename].append(system_info) @@ -450,5 +783,5 @@ def create_json(jsonAbsPath, systemInfo, input_metadata, mov_stream_sum, mkvHash data[baseFilename].append(mov_file_meta) data[baseFilename].append(techdata) data[baseFilename].append(qcResults) - with open(jsonAbsPath, 'w', newline='\n') as outfile: + with open(jsonAbsPath, "w", newline="\n") as outfile: json.dump(data, outfile, indent=4) diff --git a/AV/Video/aja_mov2ffv1/run.py b/AV/Video/aja_mov2ffv1/run.py index cdd4c02..5c82d62 100755 --- a/AV/Video/aja_mov2ffv1/run.py +++ b/AV/Video/aja_mov2ffv1/run.py @@ -1,13 +1,16 @@ #!/usr/bin/env python3 import sys + if sys.version_info[0] < 3: raise Exception("Python 3 or a more recent version is required.") + def main(): from aja_mov2ffv1 import mov2ffv1mainfunc + mov2ffv1mainfunc.aja_mov2ffv1_main() + if __name__ == "__main__": - main() - \ No newline at end of file + main() diff --git a/AV/Video/aja_mov2ffv1/setup.py b/AV/Video/aja_mov2ffv1/setup.py index 7f1a442..7a9559d 100644 --- a/AV/Video/aja_mov2ffv1/setup.py +++ b/AV/Video/aja_mov2ffv1/setup.py @@ -1,18 +1,21 @@ #!/usr/bin/env python3 from setuptools import setup -#with open("README") as f: -# long_description = f.read() -setup ( - name ='aja_mov2ffv1', - version='1.0', - license='MIT', -# long_description=long_description, - author='Joshua Yocum', - packages={'aja_mov2ffv1'}, - package_data={'aja_mov2ffv1': ['data/mediaconch_policies/*.xml']}, +# with open("README") as f: +# long_description = f.read() + +setup( + name="aja_mov2ffv1", + version="1.0", + license="MIT", + # long_description=long_description, + author="Joshua Yocum", + packages={"aja_mov2ffv1"}, + package_data={"aja_mov2ffv1": ["data/mediaconch_policies/*.xml"]}, include_package_data=True, - entry_points={'console_scripts': ['aja-mov2ffv1 = aja_mov2ffv1.aja_mov2ffv1:main'],}, - python_requires='>=3.6' -) \ No newline at end of file + entry_points={ + "console_scripts": ["aja-mov2ffv1 = aja_mov2ffv1.aja_mov2ffv1:main"], + }, + python_requires=">=3.6", +) diff --git a/Image/Inventory/inventory_script.py b/Image/Inventory/inventory_script.py index 7ac0582..3da6766 100644 --- a/Image/Inventory/inventory_script.py +++ b/Image/Inventory/inventory_script.py @@ -2,268 +2,562 @@ import csv -#Asking the user questions to decide the workflow that is going to be used by the script -proj_number=input("What is the project number? ") -def ask_projnum_again (): - proj_number=input("What is the project number? ") - proj_P_TF=proj_number.startswith('p') - if proj_P_TF == True: - proj_number_leng=len(proj_number) - if proj_number_leng !=5: - print("Please answer with a 4 digit number after 'p'") - ask_projnum_again() - else: - print() +# Asking the user questions to decide the workflow that is going to be used by the script +proj_number = input("What is the project number? ") + + +def ask_projnum_again(): + proj_number = input("What is the project number? ") + proj_P_TF = proj_number.startswith("p") + if proj_P_TF == True: + proj_number_leng = len(proj_number) + if proj_number_leng != 5: + print("Please answer with a 4 digit number after 'p'") + ask_projnum_again() else: - print("Please answer with the number prefixed by a p") - ask_projnum_again() - -proj_P_TF=proj_number.startswith('p') -if proj_P_TF == True: - proj_number_leng=len(proj_number) - if proj_number_leng !=5: - print("Please answer with a 4 digit number after 'p'") - ask_projnum_again() - else: - print() -else: + print() + else: print("Please answer with the number prefixed by a p") ask_projnum_again() -proj_4dig=input("What is the four letter id? ") -def proj_name_check_again(): - proj_4dig=input("What is the four letter id? ") - proj_name_leng=len(proj_4dig) - if proj_name_leng !=4: - print("Please answer again with a four letter name") - proj_name_check_again() - else: - print("Has correct number of letters - cleared to continue") - -proj_name_leng=len(proj_4dig) -if proj_name_leng !=4: - print("Please answer again with a four letter name") - proj_name_check_again() -else: +proj_P_TF = proj_number.startswith("p") +if proj_P_TF == True: + proj_number_leng = len(proj_number) + if proj_number_leng != 5: + print("Please answer with a 4 digit number after 'p'") + ask_projnum_again() + else: print() +else: + print("Please answer with the number prefixed by a p") + ask_projnum_again() -box_other=input("Are there boxes and folders? y for Yes and n for No ") - -#Building some of the intial variables that will be used -field_names = ['Spreadsheet Row Number', 'work_image' , 'structure' , 'role' , 'work_type' , 'work_accession_number' , 'file_accession_number' , 'filename' , 'label' , 'description' , 'Capture date' , 'Staff Initials' , 'Container number ex. Box Number' , 'folder number' , 'Width (cm.)' , 'Height (cm.)' , 'Date (Year+Month+Day)' , 'project_job_number' , 'Notes about album page or photo', 'Production Notes', 'Creator' , 'Source' , 'Copyright Notice'] - -file_path=input("Where do you want to save to? ") -csv_name=proj_number+'_'+proj_4dig+'_'+'inventory'+'.csv' #assigning the CSV file a name based on the data that the user input -open_CSV='"'+'start EXCEL.exe'+' '+csv_name+'"' #Part of the command to open the CSV file in Excel -- you can swap ' EXCEL.exe' for your preferred CSV editor that is installed on your PC - -#defining functions -def ask_overwrite(): - overwrite=input("File already exists, do you want to append? y for yes and n for no -- will terminate script") - if overwrite =='y': - with open(os.path.join(file_path, open_CSV),'w') as fp: - fp.write() - - elif overwrite=='n': - print('Try running the script again') - -def save_CSV(): - try: - pass - except: - pass - -def ask_new_work_bOrf(): - new_work=input('Do you want to add a new work - y for yes, n for no which will open the CSV ') - - if new_work =='y': #if you want a new work this will run - file_number=1 - work_info=input("What is the work info? ") - number_files=int(input("How many works? ")) #number to loop by - while file_number<=number_files: # runs while the file number is less than or equal to the number of files that you need - - #Leading Zeros - file_number=str(file_number).zfill(4) - #Leading Zeros - - #Combines the information that the user has put in to fill out select fields in the CSV file - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+work_info, 'file_accession_number':proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a'+'.tif', 'role':'A', 'work_type':'IMAGE','project_job_number':proj_number}] - for data in inventory: - writer.writerows(inventory) - # Below increments the row and file up by one - file_number=int(file_number)+1 - - ask_new_work_bOrf() - - elif new_work=='n': - save_CSV() #command to save the CSV - os.system(open_CSV) #Command to open the CSV in Excel - -def ask_new_work_choose(): - new_work=input('Do you want to add a new work - y for yes, n for no which will open the CSV ') - if new_work =='y': #if you want a new work this will run - file_number=1 - work_info=input("What is the work info? ") - number_files=int(input("How many works? ")) #number to loop by - while file_number<=number_files: # runs while the file number is less than or equal to the number of files that you need +proj_4dig = input("What is the four letter id? ") - #Leading Zeros - file_number=str(file_number).zfill(4) - #Leading Zeros +def proj_name_check_again(): + proj_4dig = input("What is the four letter id? ") + proj_name_leng = len(proj_4dig) + if proj_name_leng != 4: + print("Please answer again with a four letter name") + proj_name_check_again() + else: + print("Has correct number of letters - cleared to continue") - #Combines the information that the user has put in to fill out select fields in the CSV file - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+work_info, 'file_accession_number':proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a'+'.tif','role':'A', 'work_type':'IMAGE','project_job_number':proj_number}] - for data in inventory: - writer.writerows(inventory) - # Below increments the row and file up by one - file_number=int(file_number)+1 - ask_new_work_choose() - elif new_work=='n': - save_CSV() #Saves the CSV File - os.system(open_CSV) #Opens the CSV file in Excel +proj_name_leng = len(proj_4dig) +if proj_name_leng != 4: + print("Please answer again with a four letter name") + proj_name_check_again() +else: + print() + +box_other = input("Are there boxes and folders? y for Yes and n for No ") + +# Building some of the intial variables that will be used +field_names = [ + "Spreadsheet Row Number", + "work_image", + "structure", + "role", + "work_type", + "work_accession_number", + "file_accession_number", + "filename", + "label", + "description", + "Capture date", + "Staff Initials", + "Container number ex. Box Number", + "folder number", + "Width (cm.)", + "Height (cm.)", + "Date (Year+Month+Day)", + "project_job_number", + "Notes about album page or photo", + "Production Notes", + "Creator", + "Source", + "Copyright Notice", +] + +file_path = input("Where do you want to save to? ") +csv_name = ( + proj_number + "_" + proj_4dig + "_" + "inventory" + ".csv" +) # assigning the CSV file a name based on the data that the user input +open_CSV = ( + '"' + "start EXCEL.exe" + " " + csv_name + '"' +) # Part of the command to open the CSV file in Excel -- you can swap ' EXCEL.exe' for your preferred CSV editor that is installed on your PC + + +# defining functions +def ask_overwrite(): + overwrite = input( + "File already exists, do you want to append? y for yes and n for no -- will terminate script" + ) + if overwrite == "y": + with open(os.path.join(file_path, open_CSV), "w") as fp: + fp.write() + elif overwrite == "n": + print("Try running the script again") -def ask_new_bOrf(): #function that will ask if you want to add a box or folder. If you want to add pages, answer 'n' to the question and then 'y' to the following question - new_bOrf=input('Do you want to add a work - y for yes, n for no (which will ask about if you want to add a work with page designation) ') +def save_CSV(): + try: + pass + except: + pass - if new_bOrf =='y': #if you want a new box or folder this will run - file_number=1 - box_number=input("What is the box number?") - folder_number=input("What is the folder number?") - - #Leading Zeros - box_number=str(box_number).zfill(3) - folder_number=str(folder_number).zfill(2) - #Leading Zeros +def ask_new_work_bOrf(): + new_work = input( + "Do you want to add a new work - y for yes, n for no which will open the CSV " + ) + + if new_work == "y": # if you want a new work this will run + file_number = 1 + work_info = input("What is the work info? ") + number_files = int(input("How many works? ")) # number to loop by + while ( + file_number <= number_files + ): # runs while the file number is less than or equal to the number of files that you need + # Leading Zeros + file_number = str(file_number).zfill(4) + # Leading Zeros + + # Combines the information that the user has put in to fill out select fields in the CSV file + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info, + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a" + + ".tif", + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + } + ] + for data in inventory: + writer.writerows(inventory) + # Below increments the row and file up by one + file_number = int(file_number) + 1 + + ask_new_work_bOrf() + + elif new_work == "n": + save_CSV() # command to save the CSV + os.system(open_CSV) # Command to open the CSV in Excel - folder_files=int(input("How many works? ")) #number to loop by - while file_number <= folder_files: # runs while the file number is less than or equal to the number of files that you need - #Leading Zeros - file_number=str(file_number).zfill(4) - #Leading Zeros +def ask_new_work_choose(): + new_work = input( + "Do you want to add a new work - y for yes, n for no which will open the CSV " + ) + + if new_work == "y": # if you want a new work this will run + file_number = 1 + work_info = input("What is the work info? ") + number_files = int(input("How many works? ")) # number to loop by + while ( + file_number <= number_files + ): # runs while the file number is less than or equal to the number of files that you need + # Leading Zeros + file_number = str(file_number).zfill(4) + # Leading Zeros + + # Combines the information that the user has put in to fill out select fields in the CSV file + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info, + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a" + + ".tif", + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + } + ] + for data in inventory: + writer.writerows(inventory) + # Below increments the row and file up by one + file_number = int(file_number) + 1 + + ask_new_work_choose() + elif new_work == "n": + save_CSV() # Saves the CSV File + os.system(open_CSV) # Opens the CSV file in Excel + + +def ask_new_bOrf(): # function that will ask if you want to add a box or folder. If you want to add pages, answer 'n' to the question and then 'y' to the following question + new_bOrf = input( + "Do you want to add a work - y for yes, n for no (which will ask about if you want to add a work with page designation) " + ) + + if new_bOrf == "y": # if you want a new box or folder this will run + file_number = 1 + box_number = input("What is the box number?") + folder_number = input("What is the folder number?") + + # Leading Zeros + box_number = str(box_number).zfill(3) + folder_number = str(folder_number).zfill(2) + # Leading Zeros + + folder_files = int(input("How many works? ")) # number to loop by + while ( + file_number <= folder_files + ): # runs while the file number is less than or equal to the number of files that you need + # Leading Zeros + file_number = str(file_number).zfill(4) + # Leading Zeros + + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number, + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file_number + + "_" + + "01" + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file_number + + "_" + + "01" + + "_" + + "a" + + ".tif", + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + "Container number ex. Box Number": box_number, + "folder number": folder_number, + } + ] + for data in inventory: + writer.writerows(inventory) + file_number = int(file_number) + 1 + + ask_new_bOrf() + + elif new_bOrf == "n": # if you do not want to run a new box or folder + new_pages = input( + "Do you want to add a work with more than one page - y for Yes and n for No " + ) # will ask if you want to add a work that has to have page designation + if new_pages == "y": # if you want to use page designation this will run + page_path() + elif new_pages == "n": + save_CSV() # Saves the CSV + os.system(open_CSV) # Opens the CSV in Excel + + +def page_path(): # This script runs when you want to add works with page designations + page_number = 1 + box_number = input("What is the box number? ") + folder_number = input("What is the folder number? ") + + # Leading Zeros + box_number = str(box_number).zfill(3) + folder_number = str(folder_number).zfill(2) + # Leading Zeros + + file = input("What is your work number? ") + pages = int( + input("What is the number of images in the work? ") + ) # number to loop by + + while ( + page_number <= pages + ): # runs while the file number is less than or equal to the number of files that you need + # Leading Zeros + page_number = str(page_number).zfill(4) + file = str(file).zfill(2) + # Leading Zeros + + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file + + "_", + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file + + "_" + + page_number + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file + + "_" + + page_number + + "_" + + "a" + + ".tif", + "Container number ex. Box Number": box_number, + "folder number": folder_number, + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + } + ] + + for data in inventory: + writer.writerows(inventory) + page_number = int(page_number) + 1 + ask_new_bOrf() # will then call the ask_new_bOrf function to see if you want to add a box or folder next. If you want to run another work with page designations, answer 'n' to the first question and the 'y' to the next + + +# setting variables up to be used + + +page_number = 1 +file_number = 1 +new_bOrf = 0 + + +if box_other == "y": # will run box folder version + pages_yn = input( + "Does your folder have works with multiple images? (ex. a 3 page letter) y for Yes and n for No " + ) + if pages_yn == "y": # runs if you need to designate page numbers + with open(csv_name, "a", newline="", encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=field_names) + writer.writeheader() + page_path() + + elif pages_yn == "n": + box_number = input("What is the box number? ") + folder_number = input("What is the folder number? ") + + # Leading Zeros + box_number = str(box_number).zfill(3) + folder_number = str(folder_number).zfill(2) + # Leading Zeros + + folder_files = int( + input("What is the number of images? ") + ) # number to loop by + with open(csv_name, "a", newline="", encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=field_names) + writer.writeheader() + + while ( + file_number <= folder_files + ): # runs while the file number is less than or equal to the number of files that you need + # Leading Zeros + file_number = str(file_number).zfill(4) + # Leading Zeros + + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file_number, + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file_number + + "_" + + "01" + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + "b" + + box_number + + "_" + + "f" + + folder_number + + "_" + + file_number + + "_" + + "01" + + "_" + + "a" + + ".tif", + "Container number ex. Box Number": box_number, + "folder number": folder_number, + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + } + ] + + for data in inventory: + writer.writerows(inventory) + + # Below increments the file up by one + file_number = int(file_number) + 1 + ask_new_bOrf() - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number, 'file_accession_number':proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file_number+'_'+'01'+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file_number+'_'+'01'+'_'+'a'+'.tif','role':'A', 'work_type':'IMAGE','project_job_number':proj_number, 'Container number ex. Box Number':box_number,'folder number':folder_number}] - for data in inventory: - writer.writerows(inventory) - file_number=int(file_number)+1 - ask_new_bOrf() - - elif new_bOrf == 'n': #if you do not want to run a new box or folder - new_pages=input('Do you want to add a work with more than one page - y for Yes and n for No ') #will ask if you want to add a work that has to have page designation - if new_pages =='y': #if you want to use page designation this will run - page_path() - elif new_pages=='n': - save_CSV() #Saves the CSV - os.system(open_CSV) #Opens the CSV in Excel - -def page_path(): #This script runs when you want to add works with page designations - page_number=1 - box_number=input("What is the box number? ") - folder_number=input("What is the folder number? ") - - #Leading Zeros - box_number=str(box_number).zfill(3) - folder_number=str(folder_number).zfill(2) - #Leading Zeros - - - file=input('What is your work number? ') - pages=int(input("What is the number of images in the work? ")) #number to loop by - - while page_number <= pages: # runs while the file number is less than or equal to the number of files that you need - - #Leading Zeros - page_number=str(page_number).zfill(4) - file=str(file).zfill(2) - #Leading Zeros - - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number +'_'+file+'_', 'file_accession_number':proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file+'_'+page_number+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file+'_'+page_number+'_'+'a'+'.tif', 'Container number ex. Box Number':box_number, 'folder number':folder_number, 'role':'A', 'work_type':'IMAGE','project_job_number':proj_number}] - - for data in inventory: - writer.writerows(inventory) - page_number=int(page_number)+1 - ask_new_bOrf() #will then call the ask_new_bOrf function to see if you want to add a box or folder next. If you want to run another work with page designations, answer 'n' to the first question and the 'y' to the next - - -#setting variables up to be used - - -page_number=1 -file_number=1 -new_bOrf=0 - - - - -if box_other=='y': # will run box folder version - pages_yn=input("Does your folder have works with multiple images? (ex. a 3 page letter) y for Yes and n for No ") - if pages_yn=='y': #runs if you need to designate page numbers - with open (csv_name,'a', newline='', encoding='utf-8') as csvfile: - writer=csv.DictWriter(csvfile,fieldnames=field_names) - writer.writeheader() - page_path() - - elif pages_yn =='n': - box_number=input("What is the box number? ") - folder_number=input("What is the folder number? ") - - #Leading Zeros - box_number=str(box_number).zfill(3) - folder_number=str(folder_number).zfill(2) - #Leading Zeros - - folder_files=int(input("What is the number of images? ")) #number to loop by - with open (csv_name,'a', newline='', encoding='utf-8') as csvfile: - writer=csv.DictWriter(csvfile,fieldnames=field_names) - writer.writeheader() - - while file_number <= folder_files: # runs while the file number is less than or equal to the number of files that you need - - #Leading Zeros - file_number=str(file_number).zfill(4) - #Leading Zeros - - - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file_number, 'file_accession_number':proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file_number+'_'+'01'+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+'b'+box_number+'_'+'f'+folder_number+'_'+file_number+'_'+'01'+'_'+'a'+'.tif', 'Container number ex. Box Number':box_number, 'folder number':folder_number, 'role':'A', 'work_type':'IMAGE','project_job_number':proj_number}] - - for data in inventory: - writer.writerows(inventory) - - # Below increments the file up by one - file_number=int(file_number)+1 - ask_new_bOrf() - - - - -elif box_other=='n': #will run choose your own adventure - work_info=input("What is your work info ex. v for volume? ") #takes the place of the Box and Folder # information - number_files=int(input("What is the number of images? ")) # number to loop by - - - with open (csv_name,'a', newline='', encoding='utf-8') as csvfile: - writer=csv.DictWriter(csvfile,fieldnames=field_names) - writer.writeheader() - while file_number <= number_files: # runs while the file number is less than or equal to the number of files that you need - - #leading zeros - file_number=str(file_number).zfill(4) - #leading zeros - - inventory=[{'work_accession_number': proj_number+'_'+proj_4dig+'_'+work_info, 'file_accession_number':proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a', 'filename': proj_number+'_'+proj_4dig+'_'+work_info+'_'+file_number+'_'+'a'+'.tif','role':'A', 'work_type':'IMAGE','project_job_number':proj_number}] - for data in inventory: - writer.writerows(inventory) - # Below increments the row and file up by one - file_number=int(file_number)+1 +elif box_other == "n": # will run choose your own adventure + work_info = input( + "What is your work info ex. v for volume? " + ) # takes the place of the Box and Folder # information + number_files = int( + input("What is the number of images? ") + ) # number to loop by + + with open(csv_name, "a", newline="", encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=field_names) + writer.writeheader() + while ( + file_number <= number_files + ): # runs while the file number is less than or equal to the number of files that you need + # leading zeros + file_number = str(file_number).zfill(4) + # leading zeros + + inventory = [ + { + "work_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info, + "file_accession_number": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a", + "filename": proj_number + + "_" + + proj_4dig + + "_" + + work_info + + "_" + + file_number + + "_" + + "a" + + ".tif", + "role": "A", + "work_type": "IMAGE", + "project_job_number": proj_number, + } + ] + for data in inventory: + writer.writerows(inventory) + # Below increments the row and file up by one + file_number = int(file_number) + 1 + + ask_new_work_choose() - ask_new_work_choose() - else: print("Try again running the script again with y for YES or n for NO") diff --git a/Image/iqc/iqc/iqc.py b/Image/iqc/iqc/iqc.py index 000c033..0def278 100755 --- a/Image/iqc/iqc/iqc.py +++ b/Image/iqc/iqc/iqc.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -#TO DO - Organize functions better -#TO DO - add a way to handle checksum files with path information in them +# TO DO - Organize functions better +# TO DO - add a way to handle checksum files with path information in them import csv import json @@ -17,33 +17,35 @@ from PIL import Image from iqc.iqcparameters import args -column_to_match = 'filename' -target_identifiers = ['target_', '_target.tif'] -target_filter = '|'.join(target_identifiers) +column_to_match = "filename" +target_identifiers = ["target_", "_target.tif"] +target_filter = "|".join(target_identifiers) + def input_check(): - '''Checks if input was provided and if it is a directory that exists''' + """Checks if input was provided and if it is a directory that exists""" if args.input_path: indir = args.input_path else: - print ("No input provided") + print("No input provided") quit() if not os.path.isdir(indir): - print('input is not a directory') + print("input is not a directory") quit() return indir + def generate_checksums(filename, file, checksum_type): - '''Uses hashlib to return an MD5 checksum of an input filename''' + """Uses hashlib to return an MD5 checksum of an input filename""" read_size = 0 last_percent_done = 0 method_to_call = getattr(hashlib, checksum_type) chksm = method_to_call() total_size = os.path.getsize(filename) - with open(filename, 'rb') as f: + with open(filename, "rb") as f: while True: - #2**20 is for reading the file in 1 MiB chunks + # 2**20 is for reading the file in 1 MiB chunks buf = f.read(2**20) if not buf: break @@ -51,48 +53,54 @@ def generate_checksums(filename, file, checksum_type): chksm.update(buf) percent_done = 100 * read_size / total_size if percent_done > last_percent_done: - sys.stdout.write(checksum_type + ' ' + file + ': ' + '[%d%%]\r' % percent_done) + sys.stdout.write( + checksum_type + " " + file + ": " + "[%d%%]\r" % percent_done + ) sys.stdout.flush() last_percent_done = percent_done print() checksum_output = chksm.hexdigest() return checksum_output + def interpret_checksum_command(): - '''checks if argument used with verify_checksums is valid''' - checksum_list = ['md5', 'sha1'] + """checks if argument used with verify_checksums is valid""" + checksum_list = ["md5", "sha1"] for i in args.verify_checksums: if not i in checksum_list: - print('\n---ERROR: ' + i + ' is not a valid checksum input ---\n') + print("\n---ERROR: " + i + " is not a valid checksum input ---\n") quit() + def output_check(): - '''Checks that output is valid''' + """Checks that output is valid""" output = args.output_path - if not output.endswith('.json'): + if not output.endswith(".json"): print("\n--- ERROR: Output must be a JSON file ---\n") quit() - #print("Checking output path") + # print("Checking output path") try: - with open(output, 'w', newline='\n') as outfile: + with open(output, "w", newline="\n") as outfile: outfile.close except OSError: - print("\n--- ERROR: Unable to create output file", output + ' ---\n') + print("\n--- ERROR: Unable to create output file", output + " ---\n") quit() -def image_handler(file, subdir, cleanSubdir, parameter_dict, inventorydf, column_to_match): - '''create dictionary that forms the basis for dataframes for found images''' +def image_handler( + file, subdir, cleanSubdir, parameter_dict, inventorydf, column_to_match +): + """create dictionary that forms the basis for dataframes for found images""" filepath = os.path.join(subdir, file) - for i in parameter_dict['Images']['extension']: + for i in parameter_dict["Images"]["extension"]: if file.endswith(i): - parameter_dict['Images']['row_counter'] += 1 - rowNumber = "row_" + str(parameter_dict['Images']['row_counter']) + parameter_dict["Images"]["row_counter"] += 1 + rowNumber = "row_" + str(parameter_dict["Images"]["row_counter"]) name = Path(file).stem attribute_list = [file, name, cleanSubdir, i] if args.verify_checksums: for a in args.verify_checksums: - sumtype = a + 'sum' + sumtype = a + "sum" sumtype = [generate_checksums(filepath, file, a)] attribute_list += sumtype if args.techdata: @@ -101,74 +109,91 @@ def image_handler(file, subdir, cleanSubdir, parameter_dict, inventorydf, column isgrayscale = is_grayscale(filepath) profile = load_profile(file, inventorydf, column_to_match, isgrayscale) techmetadata = get_tech_metadata(filepath) - if not str(techmetadata.get('BitsPerSample')) == profile.get('Bit Depth'): - bit_depth_check = 'FAIL' + if not str(techmetadata.get("BitsPerSample")) == profile.get( + "Bit Depth" + ): + bit_depth_check = "FAIL" else: - bit_depth_check = 'PASS' - if not techmetadata.get('ProfileDescription') in profile.get('Profile Description'): - profile_check = 'FAIL' + bit_depth_check = "PASS" + if not techmetadata.get("ProfileDescription") in profile.get( + "Profile Description" + ): + profile_check = "FAIL" else: - profile_check = 'PASS' + profile_check = "PASS" attribute_list += [bit_depth_check, profile_check] fileAttributes = {rowNumber: attribute_list} - parameter_dict['Images']['dictionary'].update(fileAttributes) + parameter_dict["Images"]["dictionary"].update(fileAttributes) + def checksum_handler(file, subdir, cleanSubdir, parameter_dict): - '''pull information out of checksum files''' + """pull information out of checksum files""" for i in parameter_dict.keys(): - if file.endswith(parameter_dict[i]['extension']): - parameter_dict[i]['row_counter'] += 1 - rowNumber = "row_" + str(parameter_dict[i]['row_counter']) + if file.endswith(parameter_dict[i]["extension"]): + parameter_dict[i]["row_counter"] += 1 + rowNumber = "row_" + str(parameter_dict[i]["row_counter"]) name = Path(file).stem - '''remove double stem if present (i.e. .tif.md5)''' - if '.' in name: + """remove double stem if present (i.e. .tif.md5)""" + if "." in name: name = Path(name).stem with open(os.path.join(subdir, file)) as f: content = f.readlines() - content = [line.rstrip('\n') for line in content] + content = [line.rstrip("\n") for line in content] for value in content: - if '*' in value: - checksum_filename = value.split('*')[1] - elif ' = ' in value: - checksum_filename = value.split(' = ')[0] + if "*" in value: + checksum_filename = value.split("*")[1] + elif " = " in value: + checksum_filename = value.split(" = ")[0] else: - checksum_filename = value.split(' ')[1] - if ' ' in value or '*' in value: - checksum_value = value.split(' ')[0] - elif ' = ' in value: - checksum_value = value.split(' = ')[1] + checksum_filename = value.split(" ")[1] + if " " in value or "*" in value: + checksum_value = value.split(" ")[0] + elif " = " in value: + checksum_value = value.split(" = ")[1] else: - print ('having trouble parsing checksums') - print ('are they formatted correctly?') + print("having trouble parsing checksums") + print("are they formatted correctly?") quit() - cleanup_characters = ['./', 'MD5 ', 'SHA1 ', '(', ')'] + cleanup_characters = ["./", "MD5 ", "SHA1 ", "(", ")"] for chars in cleanup_characters: if chars in checksum_filename: - checksum_filename = checksum_filename.replace(chars, '') - fileAttributes = {rowNumber: [name, file, cleanSubdir, checksum_filename, checksum_value]} - parameter_dict[i]['dictionary'].update(fileAttributes) + checksum_filename = checksum_filename.replace(chars, "") + fileAttributes = { + rowNumber: [name, file, cleanSubdir, checksum_filename, checksum_value] + } + parameter_dict[i]["dictionary"].update(fileAttributes) + def exiftool_check(): - '''checks that exiftool exists by running its -ver command''' + """checks that exiftool exists by running its -ver command""" try: - subprocess.check_output([args.exiftool_path, '-ver']).decode("ascii").rstrip() + subprocess.check_output([args.exiftool_path, "-ver"]).decode("ascii").rstrip() except: print("Error locating exiftool") quit() + def get_iptc_metadata(filename, filepath, column_to_match, exifmetalist): - '''Run exiftool to get IPTC metadata''' - #TO DO - Feed this a list of exiftool fields to check rather than hard coding them in the command? - exiftool_command = [args.exiftool_path, '-by-line', '-source', '-copyrightnotice', '-j', filepath] - #exiftool_command = [args.exiftool_path, '-headline', '-by-line', '-source', '-copyrightnotice', '-j', filepath] + """Run exiftool to get IPTC metadata""" + # TO DO - Feed this a list of exiftool fields to check rather than hard coding them in the command? + exiftool_command = [ + args.exiftool_path, + "-by-line", + "-source", + "-copyrightnotice", + "-j", + filepath, + ] + # exiftool_command = [args.exiftool_path, '-headline', '-by-line', '-source', '-copyrightnotice', '-j', filepath] exifdata = subprocess.check_output(exiftool_command) exifdata = json.loads(exifdata) exifdata[0][column_to_match] = filename - exifmetalist += (exifdata) + exifmetalist += exifdata + def get_directory_size(directory): - #credit: How to Get the Size of Directories in Python, Abdou Rockikz - '''Returns the `directory` size in bytes''' + # credit: How to Get the Size of Directories in Python, Abdou Rockikz + """Returns the `directory` size in bytes""" total = 0 try: # print("[+] Getting the size of", directory) @@ -187,76 +212,95 @@ def get_directory_size(directory): return 0 return total + def get_size_format(b, factor=1024, suffix="B"): - #credit: How to Get the Size of Directories in Python, Abdou Rockikz - ''' + # credit: How to Get the Size of Directories in Python, Abdou Rockikz + """ Scale bytes to its proper byte format e.g: 1253656 => '1.20MB' 1253656678 => '1.17GB' - ''' + """ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if b < factor: return f"{b:.2f}{unit}{suffix}" b /= factor return f"{b:.2f}Y{suffix}" + def is_grayscale(img_path): - '''check if image is grayscale. Return true/false''' - #TO DO - set max_image_pixels to a reasonable limit? + """check if image is grayscale. Return true/false""" + # TO DO - set max_image_pixels to a reasonable limit? Image.MAX_IMAGE_PIXELS = None - img = Image.open(img_path).convert('RGB') + img = Image.open(img_path).convert("RGB") MAX_SIZE = (500, 500) img.thumbnail(MAX_SIZE) w, h = img.size for i in range(w): for j in range(h): - r, g, b = img.getpixel((i,j)) + r, g, b = img.getpixel((i, j)) if r != g != b: return False return True + def load_profile(filename, inventorydf, column_to_match, isgrayscale): - #probably better to store these in an external json file in a data folder so they'll be easier to edit + # probably better to store these in an external json file in a data folder so they'll be easier to edit p_techmetadata = {"Bit Depth": "16 16 16", "Profile Description": ["ProPhoto"]} - a_techmetadata = {"Color": {"Bit Depth": "8 8 8", "Profile Description": ["Adobe RGB (1998)"]}, "Grayscale": {"Bit Depth": "8", "Profile Description": ["Gray Gamma 2.2"]}} + a_techmetadata = { + "Color": {"Bit Depth": "8 8 8", "Profile Description": ["Adobe RGB (1998)"]}, + "Grayscale": {"Bit Depth": "8", "Profile Description": ["Gray Gamma 2.2"]}, + } empty_techmetadata = {"Bit Depth": "None", "Profile Description": ["None"]} try: - role = ''.join(inventorydf.loc[inventorydf[column_to_match] == filename]['role'].values) + role = "".join( + inventorydf.loc[inventorydf[column_to_match] == filename]["role"].values + ) except: role = None - print("WARNING: There may be a problem with the 'role' column in your inventory!") + print( + "WARNING: There may be a problem with the 'role' column in your inventory!" + ) if not role: profile = empty_techmetadata - elif role == 'P': + elif role == "P": profile = p_techmetadata - elif role == 'A' and isgrayscale == False: - profile = a_techmetadata.get('Color') - elif role == 'A' and isgrayscale == True: - profile = a_techmetadata.get('Grayscale') + elif role == "A" and isgrayscale == False: + profile = a_techmetadata.get("Color") + elif role == "A" and isgrayscale == True: + profile = a_techmetadata.get("Grayscale") else: profile = empty_techmetadata return profile + def get_tech_metadata(filepath): - exif_command = [args.exiftool_path, '-profiledescription', '-bitspersample', '-a', '-j', filepath] + exif_command = [ + args.exiftool_path, + "-profiledescription", + "-bitspersample", + "-a", + "-j", + filepath, + ] exifdata = subprocess.check_output(exif_command) exifdata = json.loads(exifdata) exifdata = exifdata[0] return exifdata + def check_for_csv_iptc(inventorydf): - #with open(csvInventory, encoding='utf-8')as f: + # with open(csvInventory, encoding='utf-8')as f: # csv_content = csv.DictReader(f, delimiter=',') - iptc_column_list = ['Copyright Notice', 'Creator', 'Source'] - #iptc_column_list = ['Copyright Notice', 'Headline', 'Creator', 'Source'] + iptc_column_list = ["Copyright Notice", "Creator", "Source"] + # iptc_column_list = ['Copyright Notice', 'Headline', 'Creator', 'Source'] missing_columns = [i for i in iptc_column_list if not i in inventorydf.columns] if missing_columns: print("\nWARNING: Your inventory does not contain the following columns\n") print(", ".join([str(i) for i in missing_columns])) print("\nCONTINUE WITHOUT CHECKING IPTC METADATA? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: print() @@ -267,15 +311,20 @@ def check_for_csv_iptc(inventorydf): else: pass + def check_for_checksum_files(indir): checksum_type = " ".join([str(i) for i in args.verify_checksums]) checksum_files = list(Path(indir).rglob(os.path.join("*." + checksum_type))) - #[f for f in os.listdir(indir) if f.endswith(checksum_type)] + # [f for f in os.listdir(indir) if f.endswith(checksum_type)] if not checksum_files: - print("\nWARNING: Your input directory does not contain any " + checksum_type + " files") + print( + "\nWARNING: Your input directory does not contain any " + + checksum_type + + " files" + ) print("\nCONTINUE WITHOUT VERIFYING CHECKSUMS? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: print() @@ -284,19 +333,20 @@ def check_for_checksum_files(indir): print() quit() + def check_for_meadow_columns(inventory): - meadow_columns = ['filename'] + meadow_columns = ["filename"] if args.techdata: - meadow_columns += ['role'] - with open(inventory, encoding='utf-8')as f: - reader = csv.DictReader(f, delimiter=',') + meadow_columns += ["role"] + with open(inventory, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=",") missing_columns = [i for i in meadow_columns if not i in reader.fieldnames] if missing_columns: print("\nWARNING: " + inventory + " is missing the following columns\n") print(", ".join([str(i) for i in missing_columns])) print("\nCONTINUE WITHOUT CHECKING TECHNICAL METADATA? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: print() @@ -305,13 +355,14 @@ def check_for_meadow_columns(inventory): print("\nPlease update your inventory and try again\n") quit() + def iqc_main(): indir = args.input_path input_check() base_folder_name = os.path.basename(indir) - #script will default to writing json report in input folder if using --all and not specifying an output + # script will default to writing json report in input folder if using --all and not specifying an output if args.all and not args.output_path: - args.output_path = os.path.join(indir, base_folder_name + '-iqc_report.json') + args.output_path = os.path.join(indir, base_folder_name + "-iqc_report.json") if args.output_path: output_check() if args.verify_metadata: @@ -319,44 +370,52 @@ def iqc_main(): exifmetalist = [] if args.verify_checksums: interpret_checksum_command() - #get the input folder size + # get the input folder size input_folder_size = get_size_format(get_directory_size(indir)) - #import inventories as dataframe + # import inventories as dataframe if not args.inventory_path: inventoryPath = indir if os.path.isdir(inventoryPath): inventories = glob.glob(os.path.join(inventoryPath, "*.csv")) if not inventories: - print("\n--- ERROR: No inventory found. Either specify your inventories or place them in the base folder of your input directory ---\n") + print( + "\n--- ERROR: No inventory found. Either specify your inventories or place them in the base folder of your input directory ---\n" + ) quit() - inventorydf = pd.concat([pd.read_csv(inv, skiprows=0, header=0) for inv in inventories]) + inventorydf = pd.concat( + [pd.read_csv(inv, skiprows=0, header=0) for inv in inventories] + ) else: - print('\n--- ERROR: Inventory path is not valid ---\n') + print("\n--- ERROR: Inventory path is not valid ---\n") quit() else: inventoryPath = args.inventory_path - if inventoryPath.endswith('.csv'): + if inventoryPath.endswith(".csv"): inventories = [inventoryPath] if os.path.isfile(inventoryPath): - inventorydf = pd.read_csv(inventoryPath, skiprows=0, header=0) + inventorydf = pd.read_csv(inventoryPath, skiprows=0, header=0) else: - print('\n--- ERROR: Supplied inventory path is not valid ---\n') + print("\n--- ERROR: Supplied inventory path is not valid ---\n") quit() else: if os.path.isdir(inventoryPath): inventories = glob.glob(os.path.join(inventoryPath, "*.csv")) if not inventories: - print("\n--- ERROR: The specified inventory folder does not contain any CSV files ---\n") + print( + "\n--- ERROR: The specified inventory folder does not contain any CSV files ---\n" + ) quit() - #Note - header=1 is used to grab the second row of the spreadsheet as the header row - #use header=0 for csv files where the first row is the header row - inventorydf = pd.concat([pd.read_csv(inv, skiprows=0, header=0) for inv in inventories]) + # Note - header=1 is used to grab the second row of the spreadsheet as the header row + # use header=0 for csv files where the first row is the header row + inventorydf = pd.concat( + [pd.read_csv(inv, skiprows=0, header=0) for inv in inventories] + ) else: - print('\n--- ERROR: Supplied inventory path is not valid ---\n') + print("\n--- ERROR: Supplied inventory path is not valid ---\n") quit() - #pre-run checks of inventories and input files + # pre-run checks of inventories and input files for inv in inventories: check_for_meadow_columns(inv) if args.verify_metadata: @@ -364,124 +423,242 @@ def iqc_main(): if args.verify_checksums: check_for_checksum_files(indir) - #create dictionaries containing the formats we want to process - image_dictionary = {'Images' : { 'extension' : ['.tif'], 'row_counter' : 0, 'dictionary' : {}}} - #TO DO - review/update checksum implementation now that script has been changed to only check one type at a time - checksum_sidecar_dictionary = {'MD5' : { 'extension' : '.md5', 'row_counter' : 0, 'dictionary' : {}}, 'SHA1' : { 'extension' : '.sha1', 'row_counter' : 0, 'dictionary' : {}}} + # create dictionaries containing the formats we want to process + image_dictionary = { + "Images": {"extension": [".tif"], "row_counter": 0, "dictionary": {}} + } + # TO DO - review/update checksum implementation now that script has been changed to only check one type at a time + checksum_sidecar_dictionary = { + "MD5": {"extension": ".md5", "row_counter": 0, "dictionary": {}}, + "SHA1": {"extension": ".sha1", "row_counter": 0, "dictionary": {}}, + } print("\nSearching input for files...") - #search input and process specified files + # search input and process specified files for subdir, dirs, files in os.walk(indir): - cleanSubdir = (subdir.replace(indir, '')) - #skip hidden files and folders - files = [f for f in files if not f[0] == '.'] - dirs[:] = [d for d in dirs if not d[0] == '.'] + cleanSubdir = subdir.replace(indir, "") + # skip hidden files and folders + files = [f for f in files if not f[0] == "."] + dirs[:] = [d for d in dirs if not d[0] == "."] for file in files: - #TO DO - reduce the number of variables getting passed around - image_handler(file, subdir, cleanSubdir, image_dictionary, inventorydf, column_to_match) + # TO DO - reduce the number of variables getting passed around + image_handler( + file, + subdir, + cleanSubdir, + image_dictionary, + inventorydf, + column_to_match, + ) checksum_handler(file, subdir, cleanSubdir, checksum_sidecar_dictionary) if args.verify_metadata: - for i in image_dictionary['Images']['extension']: - #TO DO - This most likely is not the most efficient way to do this - #try merging with related image handler steps instead + for i in image_dictionary["Images"]["extension"]: + # TO DO - This most likely is not the most efficient way to do this + # try merging with related image handler steps instead if file.endswith(i): print("checking IPTC metadata for " + file) - get_iptc_metadata(file, os.path.join(subdir, file), column_to_match, exifmetalist) - - #test if these can be grabbed using something like exiftool -By-line -a image? + get_iptc_metadata( + file, + os.path.join(subdir, file), + column_to_match, + exifmetalist, + ) + + # test if these can be grabbed using something like exiftool -By-line -a image? if args.verify_metadata: exifdf = pd.DataFrame.from_records(exifmetalist) - #exifdf = exifdf[[column_to_match, 'By-line', 'Headline', 'Source', 'CopyrightNotice']] - exifdf = exifdf[[column_to_match, 'By-line', 'Source', 'CopyrightNotice']] + # exifdf = exifdf[[column_to_match, 'By-line', 'Headline', 'Source', 'CopyrightNotice']] + exifdf = exifdf[[column_to_match, "By-line", "Source", "CopyrightNotice"]] - '''determine column headers for image dataframe based on arguments used''' - image_columns = [column_to_match, 'base filename', 'file path', 'extension'] + """determine column headers for image dataframe based on arguments used""" + image_columns = [column_to_match, "base filename", "file path", "extension"] if args.verify_checksums: for i in args.verify_checksums: - column_name = 'calculated ' + i + ' checksum' + column_name = "calculated " + i + " checksum" image_columns += [column_name] if args.techdata: image_columns += ["Bit Depth Check", "Color Profile Check"] - imagedf = pd.DataFrame.from_dict(image_dictionary['Images']['dictionary'], orient='index', columns=image_columns) - md5df = pd.DataFrame.from_dict(checksum_sidecar_dictionary['MD5']['dictionary'], orient='index', columns=['base filename', 'md5 filename', 'md5 path', 'filename in md5 checksum', 'md5 checksum value']) - sha1df = pd.DataFrame.from_dict(checksum_sidecar_dictionary['SHA1']['dictionary'], orient='index', columns=['base filename', 'sha1 filename', 'sha1 path', 'filename in sha1 checksum', 'sha1 checksum value']) - #returns a df of just the target files + imagedf = pd.DataFrame.from_dict( + image_dictionary["Images"]["dictionary"], orient="index", columns=image_columns + ) + md5df = pd.DataFrame.from_dict( + checksum_sidecar_dictionary["MD5"]["dictionary"], + orient="index", + columns=[ + "base filename", + "md5 filename", + "md5 path", + "filename in md5 checksum", + "md5 checksum value", + ], + ) + sha1df = pd.DataFrame.from_dict( + checksum_sidecar_dictionary["SHA1"]["dictionary"], + orient="index", + columns=[ + "base filename", + "sha1 filename", + "sha1 path", + "filename in sha1 checksum", + "sha1 checksum value", + ], + ) + # returns a df of just the target files targetdf = imagedf[imagedf[column_to_match].str.contains(target_filter, na=False)] - #count the number of items in inventory + # count the number of items in inventory inventory_count = len(inventorydf.index) - #count the number of images + # count the number of images image_count = len(imagedf.index) - #count the number of target files found + # count the number of target files found target_count = len(targetdf.index) - #subtract the target file count from the image count + # subtract the target file count from the image count image_count = image_count - target_count - #filter out targets from imagedf using the inverse of the target filter - #imagedf = imagedf[~imagedf[column_to_match].str.endswith("_target.tif", na=False)] + # filter out targets from imagedf using the inverse of the target filter + # imagedf = imagedf[~imagedf[column_to_match].str.endswith("_target.tif", na=False)] imagedf = imagedf[~imagedf[column_to_match].index.isin(targetdf.index)] - #imagedf = imagedf[~imagedf[column_to_match]] - #returns df of images not found in inventory - df3 = imagedf.merge(inventorydf, how='left', on=column_to_match, indicator="Status").query('Status == "left_only"') - #returns df of inventory entries with no tif file - df4 = imagedf.merge(inventorydf, how='right', on=column_to_match, indicator="Status").query('Status == "right_only"') - #creates DF of combined inventory and file DFs - df_merged = inventorydf.merge(imagedf, how='left', on=column_to_match) + # imagedf = imagedf[~imagedf[column_to_match]] + # returns df of images not found in inventory + df3 = imagedf.merge( + inventorydf, how="left", on=column_to_match, indicator="Status" + ).query('Status == "left_only"') + # returns df of inventory entries with no tif file + df4 = imagedf.merge( + inventorydf, how="right", on=column_to_match, indicator="Status" + ).query('Status == "right_only"') + # creates DF of combined inventory and file DFs + df_merged = inventorydf.merge(imagedf, how="left", on=column_to_match) if args.verify_metadata: - df_merged = df_merged.merge(exifdf, how='left', on=column_to_match) - #remove target files and instances where there is no file/inventory entry to match - clean_exifdf = pd.merge(exifdf, df3[column_to_match], on=column_to_match, how='outer', indicator='exifdfmatch').query("exifdfmatch == 'left_only'") - clean_exifdf = clean_exifdf[~clean_exifdf[column_to_match].str.contains(target_filter, na=False)] + df_merged = df_merged.merge(exifdf, how="left", on=column_to_match) + # remove target files and instances where there is no file/inventory entry to match + clean_exifdf = pd.merge( + exifdf, + df3[column_to_match], + on=column_to_match, + how="outer", + indicator="exifdfmatch", + ).query("exifdfmatch == 'left_only'") + clean_exifdf = clean_exifdf[ + ~clean_exifdf[column_to_match].str.contains(target_filter, na=False) + ] if args.strict: - metadf_failures = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'By-line', 'Source', 'CopyrightNotice'], right_on=[column_to_match, 'Creator', 'Source', 'Copyright Notice'], how='outer', indicator='metamatch').query("metamatch == 'left_only'") - #metadf_failures = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'By-line', 'Headline', 'Source', 'CopyrightNotice'], right_on=[column_to_match, 'Creator', 'Headline', 'Source', 'Copyright Notice'], how='outer', indicator='metamatch').query("metamatch == 'left_only'") + metadf_failures = pd.merge( + clean_exifdf, + inventorydf, + left_on=[column_to_match, "By-line", "Source", "CopyrightNotice"], + right_on=[column_to_match, "Creator", "Source", "Copyright Notice"], + how="outer", + indicator="metamatch", + ).query("metamatch == 'left_only'") + # metadf_failures = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'By-line', 'Headline', 'Source', 'CopyrightNotice'], right_on=[column_to_match, 'Creator', 'Headline', 'Source', 'Copyright Notice'], how='outer', indicator='metamatch').query("metamatch == 'left_only'") else: - copyright_pattern = '|'.join(r"{}".format(x) for x in exifdf['CopyrightNotice']) - inventorydf['copyright_pattern_match'] = inventorydf['Copyright Notice'].str.extract('('+ copyright_pattern +')', expand=False) - copyright_partial_match = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'CopyrightNotice'], right_on=[column_to_match, 'copyright_pattern_match'], how='outer', indicator="copyright_metadata_status").query("copyright_metadata_status == 'left_only'") - ''' + copyright_pattern = "|".join( + r"{}".format(x) for x in exifdf["CopyrightNotice"] + ) + inventorydf["copyright_pattern_match"] = inventorydf[ + "Copyright Notice" + ].str.extract("(" + copyright_pattern + ")", expand=False) + copyright_partial_match = pd.merge( + clean_exifdf, + inventorydf, + left_on=[column_to_match, "CopyrightNotice"], + right_on=[column_to_match, "copyright_pattern_match"], + how="outer", + indicator="copyright_metadata_status", + ).query("copyright_metadata_status == 'left_only'") + """ headline_pattern = '|'.join(r"{}".format(x) for x in exifdf['Headline']) inventorydf['headline_pattern_match'] = inventorydf['Headline'].str.extract('('+ headline_pattern +')', expand=False) headline_partial_match = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'Headline'], right_on=[column_to_match, 'headline_pattern_match'], how='outer', indicator="headline_metadata_status").query("headline_metadata_status == 'left_only'") - ''' - byline_pattern = '|'.join(r"{}".format(x) for x in exifdf['By-line']) - inventorydf['byline_pattern_match'] = inventorydf['Creator'].str.extract('('+ byline_pattern +')', expand=False) - byline_partial_match = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'By-line'], right_on=[column_to_match, 'byline_pattern_match'], how='outer', indicator="byline_metadata_status").query("byline_metadata_status == 'left_only'") - - source_pattern = '|'.join(r"{}".format(x) for x in exifdf['Source']) - inventorydf['source_pattern_match'] = inventorydf['Source'].str.extract('('+ source_pattern +')', expand=False) - source_partial_match = pd.merge(clean_exifdf, inventorydf, left_on=[column_to_match, 'Source'], right_on=[column_to_match, 'source_pattern_match'], how='outer', indicator="source_metadata_status").query("source_metadata_status == 'left_only'") - metadf_list = [copyright_partial_match, byline_partial_match, source_partial_match] - #metadf_list = [copyright_partial_match, headline_partial_match, byline_partial_match, source_partial_match] - #merge all metadata dataframes - metadf_failures = reduce(lambda left,right: pd.merge(left,right,on=[column_to_match], how='outer'), metadf_list) + """ + byline_pattern = "|".join(r"{}".format(x) for x in exifdf["By-line"]) + inventorydf["byline_pattern_match"] = inventorydf["Creator"].str.extract( + "(" + byline_pattern + ")", expand=False + ) + byline_partial_match = pd.merge( + clean_exifdf, + inventorydf, + left_on=[column_to_match, "By-line"], + right_on=[column_to_match, "byline_pattern_match"], + how="outer", + indicator="byline_metadata_status", + ).query("byline_metadata_status == 'left_only'") + + source_pattern = "|".join(r"{}".format(x) for x in exifdf["Source"]) + inventorydf["source_pattern_match"] = inventorydf["Source"].str.extract( + "(" + source_pattern + ")", expand=False + ) + source_partial_match = pd.merge( + clean_exifdf, + inventorydf, + left_on=[column_to_match, "Source"], + right_on=[column_to_match, "source_pattern_match"], + how="outer", + indicator="source_metadata_status", + ).query("source_metadata_status == 'left_only'") + metadf_list = [ + copyright_partial_match, + byline_partial_match, + source_partial_match, + ] + # metadf_list = [copyright_partial_match, headline_partial_match, byline_partial_match, source_partial_match] + # merge all metadata dataframes + metadf_failures = reduce( + lambda left, right: pd.merge( + left, right, on=[column_to_match], how="outer" + ), + metadf_list, + ) if args.verify_checksums: - #TO DO - Make this more generic (autofill md5/sha1 based on command) - if 'md5' in args.verify_checksums and not md5df.empty: - #TO DO should be a check if the filename in the checksum file matches the checksum base name - df_merged = df_merged.merge(md5df, how='left', left_on=column_to_match, right_on='filename in md5 checksum') - missing_md5_df = df_merged[df_merged['md5 checksum value'].isnull()] - df_merged['checksum_match'] = df_merged['md5 checksum value'].eq(df_merged['calculated md5 checksum']) - failed_md5_df = df_merged[df_merged['md5 checksum value'].notnull()] - failed_md5_df = failed_md5_df.loc[failed_md5_df['checksum_match'] == False] - if 'sha1' in args.verify_checksums and not sha1df.empty: - df_merged = df_merged.merge(sha1df, how='left', left_on=column_to_match, right_on='filename in sha1 checksum') - missing_sha1_df = df_merged[df_merged['sha1 checksum value'].isnull()] - df_merged['checksum_match'] = df_merged['sha1 checksum value'].eq(df_merged['calculated sha1 checksum']) - failed_sha1_df = df_merged[df_merged['sha1 checksum value'].notnull()] - failed_sha1_df = failed_sha1_df.loc[failed_sha1_df['checksum_match'] == False] + # TO DO - Make this more generic (autofill md5/sha1 based on command) + if "md5" in args.verify_checksums and not md5df.empty: + # TO DO should be a check if the filename in the checksum file matches the checksum base name + df_merged = df_merged.merge( + md5df, + how="left", + left_on=column_to_match, + right_on="filename in md5 checksum", + ) + missing_md5_df = df_merged[df_merged["md5 checksum value"].isnull()] + df_merged["checksum_match"] = df_merged["md5 checksum value"].eq( + df_merged["calculated md5 checksum"] + ) + failed_md5_df = df_merged[df_merged["md5 checksum value"].notnull()] + failed_md5_df = failed_md5_df.loc[failed_md5_df["checksum_match"] == False] + if "sha1" in args.verify_checksums and not sha1df.empty: + df_merged = df_merged.merge( + sha1df, + how="left", + left_on=column_to_match, + right_on="filename in sha1 checksum", + ) + missing_sha1_df = df_merged[df_merged["sha1 checksum value"].isnull()] + df_merged["checksum_match"] = df_merged["sha1 checksum value"].eq( + df_merged["calculated sha1 checksum"] + ) + failed_sha1_df = df_merged[df_merged["sha1 checksum value"].notnull()] + failed_sha1_df = failed_sha1_df.loc[ + failed_sha1_df["checksum_match"] == False + ] if args.techdata: - cleandf = pd.merge(imagedf, df3[column_to_match], on=column_to_match, how='outer', indicator='present').query("present == 'left_only'") - bitdepthdf = cleandf.loc[cleandf['Bit Depth Check'] == "FAIL"] - profiledf = cleandf.loc[cleandf['Color Profile Check'] == "FAIL"] - - #presentation related stuff for reporting - #TO DO organize this better to reduce repeated and dispersed info - create json dict first, then just pull from it or update values - #Consider only printing a report in terminal if an output file is NOT specified? + cleandf = pd.merge( + imagedf, + df3[column_to_match], + on=column_to_match, + how="outer", + indicator="present", + ).query("present == 'left_only'") + bitdepthdf = cleandf.loc[cleandf["Bit Depth Check"] == "FAIL"] + profiledf = cleandf.loc[cleandf["Color Profile Check"] == "FAIL"] + + # presentation related stuff for reporting + # TO DO organize this better to reduce repeated and dispersed info - create json dict first, then just pull from it or update values + # Consider only printing a report in terminal if an output file is NOT specified? output_report = {} output_report[base_folder_name] = [] iqc_version = args.version @@ -498,33 +675,69 @@ def iqc_main(): print("Number of TIFF images in input: " + str(image_count)) print("Number of target files in input: " + str(target_count)) print("\n**TARGET FILES FOUND IN INPUT: " + str(targetdf[column_to_match].tolist())) - print("\n**TIFF FILES NOT FOUND IN INVENTORY: " + str(df3[column_to_match].tolist())) - print("\n**INVENTORY ENTRIES WITH NO MATCHING TIFF FILE: " + str(df4[column_to_match].tolist()) + "\n") + print( + "\n**TIFF FILES NOT FOUND IN INVENTORY: " + str(df3[column_to_match].tolist()) + ) + print( + "\n**INVENTORY ENTRIES WITH NO MATCHING TIFF FILE: " + + str(df4[column_to_match].tolist()) + + "\n" + ) if args.verify_checksums: - #more elegant way to format this? - if md5df.empty and 'md5' in args.verify_checksums: - print ('+++ WARNING: Unable to verify ' + i + ' checksums. No ' + i + ' checksums were found +++') + # more elegant way to format this? + if md5df.empty and "md5" in args.verify_checksums: + print( + "+++ WARNING: Unable to verify " + + i + + " checksums. No " + + i + + " checksums were found +++" + ) print() - checksum_results = "Unable to verify md5 checksums. No md5 checksums were found." - elif 'md5' in args.verify_checksums: + checksum_results = ( + "Unable to verify md5 checksums. No md5 checksums were found." + ) + elif "md5" in args.verify_checksums: md5_failures = failed_md5_df[column_to_match].tolist() md5_missing = missing_md5_df[column_to_match].tolist() print("**MD5 CHECKSUM FAILURES: " + str(md5_failures)) - print("\n**INVENTORY ENTRIES WITH NO MATCHING MD5 FILE: " + str(md5_missing) + "\n") + print( + "\n**INVENTORY ENTRIES WITH NO MATCHING MD5 FILE: " + + str(md5_missing) + + "\n" + ) if md5_failures or md5_missing: - checksum_results = [{"MD5 Checksum Failures" : md5_failures, "Inventory Entries with No Matching MD5 File" : md5_missing}] + checksum_results = [ + { + "MD5 Checksum Failures": md5_failures, + "Inventory Entries with No Matching MD5 File": md5_missing, + } + ] else: checksum_results = "PASS" - if sha1df.empty and 'sha1' in args.verify_checksums: - print ("+++ WARNING: Unable to verify sha1 checksums. No sha1 checksums were found +++\n") - checksum_results = "Unable to verify sha1 checksums. No sha1 checksums were found." - elif 'sha1' in args.verify_checksums: - sha1_failures =failed_sha1_df[column_to_match].tolist() + if sha1df.empty and "sha1" in args.verify_checksums: + print( + "+++ WARNING: Unable to verify sha1 checksums. No sha1 checksums were found +++\n" + ) + checksum_results = ( + "Unable to verify sha1 checksums. No sha1 checksums were found." + ) + elif "sha1" in args.verify_checksums: + sha1_failures = failed_sha1_df[column_to_match].tolist() sha1_missing = missing_sha1_df[column_to_match].tolist() print("**SHA1 CHECKSUM FAILURES: " + str(sha1_failures)) - print("\n**INVENTORY ENTRIES WITH NO MATCHING SHA1 FILE: " + str(sha1_missing) + "\n") + print( + "\n**INVENTORY ENTRIES WITH NO MATCHING SHA1 FILE: " + + str(sha1_missing) + + "\n" + ) if sha1_failures or sha1_missing: - checksum_results = [{"SHA1 Checksum Failures" : sha1_failures, "Inventory Entries with No Matching SHA1 File" : sha1_missing}] + checksum_results = [ + { + "SHA1 Checksum Failures": sha1_failures, + "Inventory Entries with No Matching SHA1 File": sha1_missing, + } + ] else: checksum_results = "PASS" if args.verify_metadata: @@ -549,27 +762,27 @@ def iqc_main(): profile_results = "PASS" report_data = { - "Report Date" : report_date, - "IQC Version" : iqc_version, - "Input Folder Size" : input_folder_size, - "Inventory Item Count" : inventory_count, - "TIFF File Count" : image_count, - "Target File Count" : target_count, - "Target Files" : targetdf[column_to_match].tolist(), - "TIFF Files Not Found in Inventory" : df3[column_to_match].tolist(), - "Inventory Files Not Found in TIFF Files" : df4[column_to_match].tolist(), - "Checksum Verification" : checksum_results, - "IPTC Metadata Verification" : metadata_results, - "Color Profile Check" : bit_depth_results, - "Bit Depth Check" : profile_results + "Report Date": report_date, + "IQC Version": iqc_version, + "Input Folder Size": input_folder_size, + "Inventory Item Count": inventory_count, + "TIFF File Count": image_count, + "Target File Count": target_count, + "Target Files": targetdf[column_to_match].tolist(), + "TIFF Files Not Found in Inventory": df3[column_to_match].tolist(), + "Inventory Files Not Found in TIFF Files": df4[column_to_match].tolist(), + "Checksum Verification": checksum_results, + "IPTC Metadata Verification": metadata_results, + "Color Profile Check": bit_depth_results, + "Bit Depth Check": profile_results, } output_report[base_folder_name].append(report_data) - #Write report data to output file + # Write report data to output file if args.output_path: - print ("Writing report to:" + args.output_path) - with open(args.output_path, 'w', newline='\n') as outfile: + print("Writing report to:" + args.output_path) + with open(args.output_path, "w", newline="\n") as outfile: json.dump(output_report, outfile, indent=4) - #df_merged.to_csv(args.output_path, sep=',', encoding='utf-8') + # df_merged.to_csv(args.output_path, sep=',', encoding='utf-8') else: - print('No output specified. Ending process') + print("No output specified. Ending process") quit() diff --git a/Image/iqc/iqc/iqcparameters.py b/Image/iqc/iqc/iqcparameters.py index 1401339..2adceed 100644 --- a/Image/iqc/iqc/iqcparameters.py +++ b/Image/iqc/iqc/iqcparameters.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 -''' +""" Argument parser for iqc script -''' +""" import argparse import sys @@ -10,24 +10,91 @@ parser = argparse.ArgumentParser() -if parser.prog == 'run.py': +if parser.prog == "run.py": import os - parser.prog = 'iqc' - version_file = open(os.path.join(os.path.dirname(__file__), 'meta', 'VERSION')) + + parser.prog = "iqc" + version_file = open(os.path.join(os.path.dirname(__file__), "meta", "VERSION")) __version__ = version_file.read().strip() else: - __version__ = get_distribution('iqc').version - -parser.add_argument('--input', '-i', required=True, action='store', dest='input_path', type=str, help='full path to input folder containing TIFF images. Directory structure does not matter.') -parser.add_argument('--output', '-o', action='store', dest='output_path', type=str, help='full path to output csv file. For debugging purposes currently.') -parser.add_argument('--inventory', required=False, action='store', dest='inventory_path', type=str, help='Full path to folder containing inventories or full path to a single CSV inventory file.') -parser.add_argument('--verify_checksums', '-c', required=False, nargs=1, action='store', dest='verify_checksums', help='Include to verify sidecar checksums. This argument must be followed by either "md5" or "sha1" to specify which type of checksum to verify') -parser.add_argument('--verify_metadata', '-m', required='--strict' in sys.argv, action='store_true', dest='verify_metadata', help='Include to check if the embedded IPTC metadata appears in the inventory. By default truncated IPTC metadata will still pass.') -parser.add_argument('--exiftool', action='store', dest='exiftool_path', default='exiftool', type=str, help='For setting a custom exiftool path') -parser.add_argument('--strict', '-s', required=False, action='store_true', help='Use with --verify_metadata to enforce exact metadata matching. Will cause truncated IPTC fields to fail') -parser.add_argument('--verify_techdata', '-t', required=False, action='store_true', dest='techdata', help='Verify technical metadata. This will check the bit depth and color profile of TIFF images.') -parser.add_argument('--all', '-a', required=False, action='store_true', dest='all', help='This is equivalent to including the commands --verify_metadata --verify_techdata --verify checksums md5 -o /path/to/input/input-iqc_report.json') -parser.add_argument('--version', '-v', action='version', version="%(prog)s "+__version__+"") + __version__ = get_distribution("iqc").version + +parser.add_argument( + "--input", + "-i", + required=True, + action="store", + dest="input_path", + type=str, + help="full path to input folder containing TIFF images. Directory structure does not matter.", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output csv file. For debugging purposes currently.", +) +parser.add_argument( + "--inventory", + required=False, + action="store", + dest="inventory_path", + type=str, + help="Full path to folder containing inventories or full path to a single CSV inventory file.", +) +parser.add_argument( + "--verify_checksums", + "-c", + required=False, + nargs=1, + action="store", + dest="verify_checksums", + help='Include to verify sidecar checksums. This argument must be followed by either "md5" or "sha1" to specify which type of checksum to verify', +) +parser.add_argument( + "--verify_metadata", + "-m", + required="--strict" in sys.argv, + action="store_true", + dest="verify_metadata", + help="Include to check if the embedded IPTC metadata appears in the inventory. By default truncated IPTC metadata will still pass.", +) +parser.add_argument( + "--exiftool", + action="store", + dest="exiftool_path", + default="exiftool", + type=str, + help="For setting a custom exiftool path", +) +parser.add_argument( + "--strict", + "-s", + required=False, + action="store_true", + help="Use with --verify_metadata to enforce exact metadata matching. Will cause truncated IPTC fields to fail", +) +parser.add_argument( + "--verify_techdata", + "-t", + required=False, + action="store_true", + dest="techdata", + help="Verify technical metadata. This will check the bit depth and color profile of TIFF images.", +) +parser.add_argument( + "--all", + "-a", + required=False, + action="store_true", + dest="all", + help="This is equivalent to including the commands --verify_metadata --verify_techdata --verify checksums md5 -o /path/to/input/input-iqc_report.json", +) +parser.add_argument( + "--version", "-v", action="version", version="%(prog)s " + __version__ + "" +) args = parser.parse_args() @@ -35,5 +102,5 @@ if args.all is True: args.verify_metadata = True - args.verify_checksums = ['md5'] + args.verify_checksums = ["md5"] args.techdata = True diff --git a/Image/iqc/run.py b/Image/iqc/run.py index 5046ab8..ebab090 100755 --- a/Image/iqc/run.py +++ b/Image/iqc/run.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 import sys + if sys.version_info[0] < 3: raise Exception("Python 3 or a more recent version is required.") + def main(): import iqc.iqc as iqc + iqc.iqc_main() + if __name__ == "__main__": main() diff --git a/Image/iqc/setup.py b/Image/iqc/setup.py index 2938e7b..f202d93 100755 --- a/Image/iqc/setup.py +++ b/Image/iqc/setup.py @@ -2,31 +2,34 @@ from setuptools import setup, find_packages + def _read(path): - with open(path, 'r') as f: + with open(path, "r") as f: return f.read() + def load_iqc_version(): import os - version_file = open(os.path.join('iqc', 'meta', 'VERSION')) + + version_file = open(os.path.join("iqc", "meta", "VERSION")) version = version_file.read().strip() return version -setup ( - name ='iqc', - version=load_iqc_version(), - license='MIT', - description='IQC is an automation tool for checking image folder contents against inventories, verifying checksums, and checking metadata.', - long_description=_read('README.md'), - url='https://github.com/nulib/nul-rdc-scripts', - author='Joshua Yocum', - packages={'iqc'}, -# package_data={'dpx2ffv1': ['data/mediaconch_policies/*.xml']}, -# include_package_data=True, - entry_points={'console_scripts': ['iqc = iqc.iqc:iqc_main'],}, - install_requires=[ - 'pandas', - 'Pillow' - ], - python_requires='>=3.6' + +setup( + name="iqc", + version=load_iqc_version(), + license="MIT", + description="IQC is an automation tool for checking image folder contents against inventories, verifying checksums, and checking metadata.", + long_description=_read("README.md"), + url="https://github.com/nulib/nul-rdc-scripts", + author="Joshua Yocum", + packages={"iqc"}, + # package_data={'dpx2ffv1': ['data/mediaconch_policies/*.xml']}, + # include_package_data=True, + entry_points={ + "console_scripts": ["iqc = iqc.iqc:iqc_main"], + }, + install_requires=["pandas", "Pillow"], + python_requires=">=3.6", ) diff --git a/Meadow/image_csv_script/image_csv_script/image_csv_script/image_csv_script.py b/Meadow/image_csv_script/image_csv_script/image_csv_script/image_csv_script.py index 6765078..411ce6b 100644 --- a/Meadow/image_csv_script/image_csv_script/image_csv_script/image_csv_script.py +++ b/Meadow/image_csv_script/image_csv_script/image_csv_script/image_csv_script.py @@ -19,7 +19,8 @@ def csv_main(): else: if os.path.isdir(indir): inventories = glob.glob(os.path.join(indir, "*.csv")) - inventorydf = pd.concat([pd.read_csv(inv, skiprows=0, header=0') for inv in inventories]) + inventorydf = pd.concat + ([pd.read_csv(inv, skiprows=0, header=0') for inv in inventories]) else: print('ERROR: Supplied inventory path is not valid') quit() diff --git a/Meadow/image_csv_script/image_csv_script/image_csv_script/parameters.py b/Meadow/image_csv_script/image_csv_script/image_csv_script/parameters.py index 3b84321..d8e97a8 100644 --- a/Meadow/image_csv_script/image_csv_script/image_csv_script/parameters.py +++ b/Meadow/image_csv_script/image_csv_script/image_csv_script/parameters.py @@ -1,18 +1,34 @@ #!/usr/bin/env python3 -''' +""" Argument parser for image ingest sheet script -''' +""" import argparse import sys parser = argparse.ArgumentParser() -parser.add_argument('--input', '-i', required=True, action='store', dest='input_path', type=str, help='full path to input folder') -parser.add_argument('--output', '-o', required=True, action='store', dest='output_path', type=str, help='full path to output csv file') -#parser.add_argument('--inventory', required=True, action='store', dest='inventory_path', type=str, help='path to folder containing inventories') -#parser.add_argument('--filter_list', action='store', dest='filter_list', help='Provide a text file with a list of files. Not implemented yet') +parser.add_argument( + "--input", + "-i", + required=True, + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + required=True, + action="store", + dest="output_path", + type=str, + help="full path to output csv file", +) +# parser.add_argument('--inventory', required=True, action='store', dest='inventory_path', type=str, help='path to folder containing inventories') +# parser.add_argument('--filter_list', action='store', dest='filter_list', help='Provide a text file with a list of files. Not implemented yet') args = parser.parse_args() diff --git a/Meadow/mig_av/mig_av/mig_av.py b/Meadow/mig_av/mig_av/mig_av.py index 4df7296..b2dade2 100644 --- a/Meadow/mig_av/mig_av/mig_av.py +++ b/Meadow/mig_av/mig_av/mig_av.py @@ -10,54 +10,51 @@ def input_check(indir): - - - '''Checks if input was provided and if it is a directory that exists''' + """Checks if input was provided and if it is a directory that exists""" if not indir: - print ("No input provided") + print("No input provided") quit() if not os.path.isdir(indir): - print('input is not a directory') + print("input is not a directory") quit() def output_check(output): - - - '''Checks that output is valid''' - if not output.endswith('.csv'): + """Checks that output is valid""" + if not output.endswith(".csv"): print("\n--- ERROR: Output must be a CSV file ---\n") quit() def interpret_aux_command(): - - - '''checks if argument passed to aux_parse is valid''' - aux_parse_list = ['extension', 'parse'] + """checks if argument passed to aux_parse is valid""" + aux_parse_list = ["extension", "parse"] for i in args.aux_parse: if not i in aux_parse_list: - print('\n---ERROR: ' + i + ' is not a valid input to the auxiliary command ---\n') + print( + "\n---ERROR: " + + i + + " is not a valid input to the auxiliary command ---\n" + ) quit() def update_fieldname_list(original_fieldname_list, missing_fieldname_list): - - - fieldname_list = [header for header in original_fieldname_list if header - not in missing_fieldname_list] + fieldname_list = [ + header + for header in original_fieldname_list + if header not in missing_fieldname_list + ] return fieldname_list def missing_description_field_handler(missing_descriptive_fieldnames): - - print("+++ WARNING: Your inventory is missing the following columns +++") print(missing_descriptive_fieldnames) print("SKIP COLUMNS AND CONTINUE? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: pass @@ -65,129 +62,144 @@ def missing_description_field_handler(missing_descriptive_fieldnames): quit() -#TODO add early warning if spreadsheet is missing important columns like work_accession_number +# TODO add early warning if spreadsheet is missing important columns like work_accession_number def import_inventories(source_inventories): - - - ''' + """ import CSV inventories and parse each row into a dictionary that is added to a list We use lists of dicts initially to catch duplicate filenames later on TODO Cell wrangling stems from here (description and label) - ''' + """ missing_fieldnames = False source_inventory_dictlist = [] for i in source_inventories: - if i.endswith('.csv'): + if i.endswith(".csv"): if os.path.isfile(i): csvDict = [] - with open(i, encoding='utf-8')as f: - reader = csv.DictReader(f, delimiter=',') + with open(i, encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter=",") for row in reader: - #work type is assumed by the presence of format-specific column headers - if 'Width (cm.)' in reader.fieldnames: - work_type = 'IMAGE' - elif 'Speed IPS' in reader.fieldnames: - work_type = 'AUDIO' - elif 'Region' or 'Stock' in reader.fieldnames: - work_type = 'VIDEO' + # work type is assumed by the presence of format-specific column headers + if "Width (cm.)" in reader.fieldnames: + work_type = "IMAGE" + elif "Speed IPS" in reader.fieldnames: + work_type = "AUDIO" + elif "Region" or "Stock" in reader.fieldnames: + work_type = "VIDEO" else: - print('''\n---ERROR: Unable to determine - work_type. ---\n''') - print('''make sure that your inventory has the - necessary format-specific columns''') - print('''IMAGE: "Width (cm.)" \n AUDIO: "Speed - IPS" \n VIDEO: "Region" or "Stock"''') + print( + """\n---ERROR: Unable to determine + work_type. ---\n""" + ) + print( + """make sure that your inventory has the + necessary format-specific columns""" + ) + print( + '''IMAGE: "Width (cm.)" \n AUDIO: "Speed + IPS" \n VIDEO: "Region" or "Stock"''' + ) quit() - name = row['filename'] - if work_type == 'AUDIO' or work_type == 'VIDEO': + name = row["filename"] + if work_type == "AUDIO" or work_type == "VIDEO": if not args.desc: - description_fields = ['inventory_title'] + description_fields = ["inventory_title"] else: description_fields = args.desc - missing_descriptive_fieldnames = [a for a in - description_fields if not a in reader.fieldnames] + missing_descriptive_fieldnames = [ + a + for a in description_fields + if not a in reader.fieldnames + ] if missing_descriptive_fieldnames: missing_fieldnames = True description_fields = update_fieldname_list (description_fields, missing_descriptive_fieldnames) description_list = [] for header in description_fields: - #TODO make this its own function since it's probably going to get repeated + # TODO make this its own function since it's probably going to get repeated description_list.append(row[header]) - description = "; ".join(i for i in - description_list if i) - #description.update({'descriptive': row[header]}) - if not 'label' in reader.fieldnames: + description = "; ".join(i for i in description_list if i) + # description.update({'descriptive': row[header]}) + if not "label" in reader.fieldnames: inventory_label = None else: - inventory_label = row['label'] - #if work_type == "VIDEO" and 'Region' in reader.fieldnames: - csvData = {'filename' : row['filename'], - 'work_type' : work_type,'work_accession_number' : row - ['work_accession_number'], - 'description' : description, - 'label' : inventory_label + inventory_label = row["label"] + # if work_type == "VIDEO" and 'Region' in reader.fieldnames: + csvData = { + "filename": row["filename"], + "work_type": work_type, + "work_accession_number": row["work_accession_number"], + "description": description, + "label": inventory_label, } - elif work_type == 'IMAGE': - csvData = {'filename' : row['filename'], - 'label' : row['label'], - 'work_type' : work_type,'work_accession_number' : row - ['work_accession_number'], - 'file_accession_number' : row - ['file_accession_number'], - 'role' : row ['role'], - 'description' : row['description'] + elif work_type == "IMAGE": + csvData = { + "filename": row["filename"], + "label": row["label"], + "work_type": work_type, + "work_accession_number": row["work_accession_number"], + "file_accession_number": row["file_accession_number"], + "role": row["role"], + "description": row["description"], } else: - print("--- ERROR: Problem identifying work type in " + i + " ---") + print( + "--- ERROR: Problem identifying work type in " + + i + + " ---" + ) quit() csvDict.append(csvData) - #print(csvDict) + # print(csvDict) if missing_fieldnames == True: - missing_description_field_handler(missing_descriptive_fieldnames) + missing_description_field_handler( + missing_descriptive_fieldnames + ) else: - print('\n--- ERROR: ' + i + ' is not a file ---\n') + print("\n--- ERROR: " + i + " is not a file ---\n") quit() else: - print('\n--- ERROR: Inventory path is not valid ---\n') + print("\n--- ERROR: Inventory path is not valid ---\n") source_inventory_dictlist.extend(csvDict) - #print(source_inventory_dictlist) - #quit() + # print(source_inventory_dictlist) + # quit() return source_inventory_dictlist def mig_av_main(): - - #sorted[] - '''setting up inputs and outputs''' + # sorted[] + """setting up inputs and outputs""" indir = args.input_path input_check(indir) if args.output_path: meadow_csv_file = args.output_path else: base_folder_name = os.path.basename(indir) - meadow_csv_file = os.path.join('''indir, base_folder_name + - '-meadow_ingest_inventory.csv''') + meadow_csv_file = os.path.join( + """indir, base_folder_name + + '-meadow_ingest_inventory.csv""" + ) output_check(meadow_csv_file) if args.aux_parse: interpret_aux_command() - '''importing inventories''' + """importing inventories""" if args.source_inventory: source_inventories = args.source_inventory source_inventory_dictlist = import_inventories(source_inventories) else: - print('\n*** Checking input directory for CSV files ***') + print("\n*** Checking input directory for CSV files ***") source_inventories = glob.glob(os.path.join(indir, "*.csv")) - #skip auto-generated meadow ingest csv if it already exists - source_inventories = [i for i in source_inventories if not - '-meadow_ingest_inventory.csv' in i] + # skip auto-generated meadow ingest csv if it already exists + source_inventories = [ + i for i in source_inventories if not "-meadow_ingest_inventory.csv" in i + ] if not source_inventories: print("\n+++ WARNING: Unable to find CSV inventory file +++") print("CONTINUE? (y/n)") - yes = {'yes','y', 'ye', ''} - no = {'no','n'} + yes = {"yes", "y", "ye", ""} + no = {"no", "n"} choice = input().lower() if choice in yes: source_inventory_dictlist = [{}] @@ -196,172 +208,323 @@ def mig_av_main(): else: sys.stdout.write("Please respond with 'yes' or 'no'") quit() - #rather than quitting - prompt user to choose whether or not to continue + # rather than quitting - prompt user to choose whether or not to continue else: print("Inventories found\n") source_inventory_dictlist = import_inventories(source_inventories) - #check that each csv file actually exists [approach later will be to iterate through loaded dictionaries of CSV files to check if a file corresponds to a key, which is derived from the filename column] - #fallback 1: if source inventory exists in indir, iterate through loading csv files all csv files - #fallback 2: if no inventory is specified and no csv files are found in indir, warn and proceed with no inventory + # check that each csv file actually exists [approach later will be to iterate through loaded dictionaries of CSV files to check if a file corresponds to a key, which is derived from the filename column] + # fallback 1: if source inventory exists in indir, iterate through loading csv files all csv files + # fallback 2: if no inventory is specified and no csv files are found in indir, warn and proceed with no inventory - ''' + """ setting up parameters for meadow inventory - ''' - #TODO may want to convert everything to lowercase so you don't risk running into errors - #TODO move generating this dict to a function in a separate module - role_dict = {'framemd5' : {'identifiers' : ['.framemd5'], 'type' : - 'extension', 'role' : 'S', 'label' : 'framemd5 file', 'file_builder' : '_supplementary_'}, - 'metadata' : {'identifiers' : ['.xml', '.json', '.pdf'], - 'type' : 'extension', 'role' : 'S', 'label' : 'technical metadata file', 'file_builder' : '_supplementary_'}, - 'qctools' : {'identifiers' : ['.xml.gz', '.qctools.mkv'], - 'type' : 'extension', 'role' : 'S', 'label' : 'QCTools report', 'file_builder' : '_supplementary_'}, - 'logfile' : {'identifiers' : ['.log'], 'type' : 'extension', - 'role' : 'S', 'label' : 'log file', 'file_builder' : '_supplementary_'}, - 'spectrogram' : {'identifiers' : ['.png', '.PNG'], 'type' : - 'extension', 'role' : 'S', 'label' : 'spectrogram file', 'file_builder' : '_supplementary_'}, - 'dpx_checksum' : {'identifiers' : ['dpx.txt'], 'type' : - 'extension', 'role' : 'S', 'label' : 'original DPX checksums', 'file_builder' : '_supplementary_'}, - 'access' : {'identifiers' : ['-a.', '_a.', '-am.', '_am.', - '_am_', '-am-', '-am_', '.mp4', '_access'], 'type' : 'pattern', 'role' : 'A', 'label' : None, 'file_builder' : '_access_'}, - 'preservation' : {'identifiers' : ['-p.', '_p.', '-pm.', - '_pm', '_pm_', '-pm-', '-pm_', '.mkv', '_preservation'], 'type' : 'pattern', 'role' : 'P', 'label' : None, 'file_builder' : '_preservation_'}, - 'auxiliary' : {'identifiers' : ['.jpg', '.JPG'], - 'type' : 'extension', 'role' : 'X', 'label' : 'image', 'file_builder' : '_auxiliary_'} - } + """ + # TODO may want to convert everything to lowercase so you don't risk running into errors + # TODO move generating this dict to a function in a separate module + role_dict = { + "framemd5": { + "identifiers": [".framemd5"], + "type": "extension", + "role": "S", + "label": "framemd5 file", + "file_builder": "_supplementary_", + }, + "metadata": { + "identifiers": [".xml", ".json", ".pdf"], + "type": "extension", + "role": "S", + "label": "technical metadata file", + "file_builder": "_supplementary_", + }, + "qctools": { + "identifiers": [".xml.gz", ".qctools.mkv"], + "type": "extension", + "role": "S", + "label": "QCTools report", + "file_builder": "_supplementary_", + }, + "logfile": { + "identifiers": [".log"], + "type": "extension", + "role": "S", + "label": "log file", + "file_builder": "_supplementary_", + }, + "spectrogram": { + "identifiers": [".png", ".PNG"], + "type": "extension", + "role": "S", + "label": "spectrogram file", + "file_builder": "_supplementary_", + }, + "dpx_checksum": { + "identifiers": ["dpx.txt"], + "type": "extension", + "role": "S", + "label": "original DPX checksums", + "file_builder": "_supplementary_", + }, + "access": { + "identifiers": [ + "-a.", + "_a.", + "-am.", + "_am.", + "_am_", + "-am-", + "-am_", + ".mp4", + "_access", + ], + "type": "pattern", + "role": "A", + "label": None, + "file_builder": "_access_", + }, + "preservation": { + "identifiers": [ + "-p.", + "_p.", + "-pm.", + "_pm", + "_pm_", + "-pm-", + "-pm_", + ".mkv", + "_preservation", + ], + "type": "pattern", + "role": "P", + "label": None, + "file_builder": "_preservation_", + }, + "auxiliary": { + "identifiers": [".jpg", ".JPG"], + "type": "extension", + "role": "X", + "label": "image", + "file_builder": "_auxiliary_", + }, + } if not args.aux_parse: - aux_dict = {'auxiliary' : {'identifiers' : None, 'type' : None, 'role' - : None, 'label' : None, 'file_builder' : None}} - #add empty aux_dict as generic catch-all to the end of role_dict + aux_dict = { + "auxiliary": { + "identifiers": None, + "type": None, + "role": None, + "label": None, + "file_builder": None, + } + } + # add empty aux_dict as generic catch-all to the end of role_dict role_dict.update(aux_dict) else: - if 'extension' in args.aux_parse: - aux_dict = {'auxiliay' : {'identifiers' : ['.jpg', '.JPG'], - 'type': 'extension', 'role' : 'X', 'label' : 'image', 'file_builder' : '_auxiliary_'} + if "extension" in args.aux_parse: + aux_dict = { + "auxiliay": { + "identifiers": [".jpg", ".JPG"], + "type": "extension", + "role": "X", + "label": "image", + "file_builder": "_auxiliary_", + } + } + elif "parse" in args.aux_parse: + aux_dict = { + "auxiliary": { + "identifiers": [ + "_Asset", + "-Asset", + "_Can", + "-Can", + "Front.", + "Back.", + "_Ephemera", + "-Ephemera", + ], + "type": "xparse", + "role": "X", + "label": None, + "file_builder": "_auxiliary_", + } } - elif 'parse' in args.aux_parse: - aux_dict = {'auxiliary' : {'identifiers' : ['_Asset', '-Asset', - '_Can', '-Can', 'Front.', 'Back.', '_Ephemera', '-Ephemera'], 'type' : 'xparse', 'role' : 'X', 'label' : None, 'file_builder' : '_auxiliary_'}} - #add the aux_dict to the beginning of the role_dict - #this will catch X files that also have a/p identifiers in the filename + # add the aux_dict to the beginning of the role_dict + # this will catch X files that also have a/p identifiers in the filename role_dict = {**aux_dict, **role_dict} - #add generic catch-all for unexpected file types to the end of role_dict - role_dict.update({'other' : {'identifiers' : None, 'type' : None, 'role' : None, 'label' : None, 'file_builder' : None}}) - - header_names = ['work_type', 'work_accession_number', 'file_accession_number', 'filename', 'description', 'label', 'role', 'work_image', 'structure'] - ''' + # add generic catch-all for unexpected file types to the end of role_dict + role_dict.update( + { + "other": { + "identifiers": None, + "type": None, + "role": None, + "label": None, + "file_builder": None, + } + } + ) + + header_names = [ + "work_type", + "work_accession_number", + "file_accession_number", + "filename", + "description", + "label", + "role", + "work_image", + "structure", + ] + """ extract the filenames from the inventories as a list - ''' + """ filename_list = [] for i in source_inventory_dictlist: - name = i.get('filename') + name = i.get("filename") filename_list.append(name) - #error out if duplicate filenames are found + # error out if duplicate filenames are found if len(filename_list) != len(set(filename_list)): - print('\n--- ERROR: There are duplicate filenames in your inventories ---\n') + print("\n--- ERROR: There are duplicate filenames in your inventories ---\n") quit() - #convert list to dict so it becomes easier to parse from here on + # convert list to dict so it becomes easier to parse from here on source_inventory_dict = {} for item in source_inventory_dictlist: - name = item['filename'] + name = item["filename"] source_inventory_dict[name] = item - #TODO add a check for existing file with filename before overwriting - ''' + # TODO add a check for existing file with filename before overwriting + """ attempt to create output csv before continuing - ''' + """ try: - with open(meadow_csv_file, 'w', newline='\n') as outfile: + with open(meadow_csv_file, "w", newline="\n") as outfile: outfile.close except OSError: - print("\n--- ERROR: Unable to create output file", meadow_csv_file + ' ---\n') + print("\n--- ERROR: Unable to create output file", meadow_csv_file + " ---\n") quit() meadow_full_dict = {} for subdir, dirs, files in os.walk(indir): dirs.sort() - clean_subdir = (subdir.replace(indir, '')) - clean_subdir = clean_subdir.strip('/') - #skip file types we don't want - #TODO put this in an external function to make this a little cleaner - files = [f for f in files if not f[0] == '.'] - files = [f for f in files if not f == 'Thumbs.db'] - files = [f for f in files if not f.endswith('.md5')] - files = [f for f in files if not f.endswith('.csv')] + clean_subdir = subdir.replace(indir, "") + clean_subdir = clean_subdir.strip("/") + # skip file types we don't want + # TODO put this in an external function to make this a little cleaner + files = [f for f in files if not f[0] == "."] + files = [f for f in files if not f == "Thumbs.db"] + files = [f for f in files if not f.endswith(".md5")] + files = [f for f in files if not f.endswith(".csv")] if args.skip: skip_list = args.skip for i in skip_list: files = [f for f in files if not i in f] - dirs[:] = [d for d in dirs if not d[0] == '.'] + dirs[:] = [d for d in dirs if not d[0] == "."] for file in sorted(files): - #set filename, use unix style path without leading slash + # set filename, use unix style path without leading slash filename = os.path.join(clean_subdir, file) filename = filename.replace(os.sep, posixpath.sep) - filename = filename.strip('/') - meadow_file_dict = {'work_type': None, - 'work_accession_number': None,'file_accession_number': None, - 'filename': filename, - 'description': None, - 'label': None, - 'role': None, - 'work_image': None, - 'structure': None - } + filename = filename.strip("/") + meadow_file_dict = { + "work_type": None, + "work_accession_number": None, + "file_accession_number": None, + "filename": filename, + "description": None, + "label": None, + "role": None, + "work_image": None, + "structure": None, + } - #TODO add safety check to make sure there aren't multiple matches for a filename in the accession numbers - #check for corresponding item in loaded inventory - #TODO handle cases where there is no inventory + # TODO add safety check to make sure there aren't multiple matches for a filename in the accession numbers + # check for corresponding item in loaded inventory + # TODO handle cases where there is no inventory for item in filename_list: if item in file: - meadow_file_dict.update({'work_accession_number': - source_inventory_dict[item]['work_accession_number']}) - #load the work type - work_type = source_inventory_dict[item]['work_type'] - meadow_file_dict.update({'work_type': work_type}) - #load the description or auto-fill if description is empty - if not source_inventory_dict[item]['description']: - meadow_file_dict.update({'description': file}) + meadow_file_dict.update( + { + "work_accession_number": source_inventory_dict[item][ + "work_accession_number" + ] + } + ) + # load the work type + work_type = source_inventory_dict[item]["work_type"] + meadow_file_dict.update({"work_type": work_type}) + # load the description or auto-fill if description is empty + if not source_inventory_dict[item]["description"]: + meadow_file_dict.update({"description": file}) else: - meadow_file_dict.update({'description': source_inventory_dict[item]['description']}) - #if dictionary does not already have a key corresponding to the item add it + meadow_file_dict.update( + {"description": source_inventory_dict[item]["description"]} + ) + # if dictionary does not already have a key corresponding to the item add it if item not in meadow_full_dict: meadow_full_dict[item] = [meadow_file_dict] - #otherwise append it to the existing key + # otherwise append it to the existing key else: meadow_full_dict[item].append(meadow_file_dict) - #setting a generic label - inventory_label = source_inventory_dict[item]['label'] + # setting a generic label + inventory_label = source_inventory_dict[item]["label"] if work_type == "VIDEO" or work_type == "AUDIO": - label,role,file_builder = meadow_parser_funcs.get_label + label, role, file_builder = meadow_parser_funcs.get_label (role_dict, file, inventory_label) - meadow_file_dict.update({'role': role}) - role_count = sum(x.get('role') == role for x in meadow_full_dict.get(item)) - meadow_file_dict.update({'label': label}) + meadow_file_dict.update({"role": role}) + role_count = sum( + x.get("role") == role for x in meadow_full_dict.get(item) + ) + meadow_file_dict.update({"label": label}) if args.prepend: - meadow_file_dict.update({'file_accession_number' : - args.prepend + item + file_builder + f'{role_count:03d}'}) + meadow_file_dict.update( + { + "file_accession_number": args.prepend + + item + + file_builder + + f"{role_count:03d}" + } + ) else: - meadow_file_dict.update({'file_accession_number' : - item + file_builder + f'{role_count:03d}'}) + meadow_file_dict.update( + { + "file_accession_number": item + + file_builder + + f"{role_count:03d}" + } + ) else: - meadow_file_dict.update({'role': source_inventory_dict - [item]['role']}) - meadow_file_dict.update({'label': inventory_label}) + meadow_file_dict.update( + {"role": source_inventory_dict[item]["role"]} + ) + meadow_file_dict.update({"label": inventory_label}) if args.prepend: - meadow_file_dict.update({'file_accession_number' : - args.prepend + source_inventory_dict[item]['file_accession_number']}) + meadow_file_dict.update( + { + "file_accession_number": args.prepend + + source_inventory_dict[item][ + "file_accession_number" + ] + } + ) else: - meadow_file_dict.update({'file_accession_number' : - source_inventory_dict[item]['file_accession_number']}) - #TODO build out how to handle cases where a file is not found in the inventory - #allow user to add the file anyway + meadow_file_dict.update( + { + "file_accession_number": source_inventory_dict[ + item + ]["file_accession_number"] + } + ) + # TODO build out how to handle cases where a file is not found in the inventory + # allow user to add the file anyway if not any(item in file for item in filename_list): - print('''"+++ WARNING: No entry matching " + file + " was found - in your inventory +++"''') + print( + '''"+++ WARNING: No entry matching " + file + " was found + in your inventory +++"''' + ) - #TODO final check that all ihidden files and folderstems from filename list are accounted for in the final inventory + # TODO final check that all ihidden files and folderstems from filename list are accounted for in the final inventory - with open(meadow_csv_file, 'w', newline='', encoding='utf-8') as f: - writer = csv.DictWriter(f, fieldnames = header_names) + with open(meadow_csv_file, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=header_names) writer.writeheader() for item in meadow_full_dict: for file_info in meadow_full_dict[item]: diff --git a/Meadow/mig_av/mig_av/mig_av_parameters.py b/Meadow/mig_av/mig_av/mig_av_parameters.py index 42e1833..cf9d2b2 100644 --- a/Meadow/mig_av/mig_av/mig_av_parameters.py +++ b/Meadow/mig_av/mig_av/mig_av_parameters.py @@ -4,20 +4,66 @@ parser = argparse.ArgumentParser() -parser.add_argument('--input', '-i', action='store', dest='input_path', - type=str, help='full path to input folder') -parser.add_argument('--output', '-o', action='store', dest='output_path', - type=str, help='full path to output csv file') -parser.add_argument('--load_inventory', '-l', required=False, nargs='*', - action='store', dest='source_inventory', help='Use to specify an object inventory. If not specified the script will look in the base folder of the input for object inventories. If no inventories are found the script will leave some fields blank.') -parser.add_argument('--skip', '-s', required=False, nargs='*', action='store', - dest='skip', help='Use to specify patterns to skip. Can take multiple inputs. For example, "_ac." "_am." could be used to skip legacy ac and am files.') -parser.add_argument('--description', '-d', required=False, nargs='*', - action='store', dest='desc', help='Use to specify column names to populate Meadow description field with. Can take multiple inputs. Information from each column will be separated by a ";" in the description. Example usage: -d "Date/Time" "Barcode". If not specified, script will default to looking for the column "inventory_title"') -#parser.add_argument('--newline_limit', '-n', required=False, nargs=1, action='store', dest='output_path', type=int, help='Limit fields imported into the description field to a certain number of newlines.') -parser.add_argument('--auxiliary', '-x', required=False, nargs=1, - action='store', dest='aux_parse', help='Sets how to parse auxiliary files. Options include: extension (by extension), parse (by word), none (no aux files). Default is none.') -parser.add_argument('--prepend_accession', '-p', action='store', - dest='prepend', type=str, help='set a string to be added to the beginning of the file accession number when generated') +parser.add_argument( + "--input", + "-i", + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output csv file", +) +parser.add_argument( + "--load_inventory", + "-l", + required=False, + nargs="*", + action="store", + dest="source_inventory", + help="Use to specify an object inventory. If not specified the script will look in the base folder of the input for object inventories. If no inventories are found the script will leave some fields blank.", +) +parser.add_argument( + "--skip", + "-s", + required=False, + nargs="*", + action="store", + dest="skip", + help='Use to specify patterns to skip. Can take multiple inputs. For example, "_ac." "_am." could be used to skip legacy ac and am files.', +) +parser.add_argument( + "--description", + "-d", + required=False, + nargs="*", + action="store", + dest="desc", + help='Use to specify column names to populate Meadow description field with. Can take multiple inputs. Information from each column will be separated by a ";" in the description. Example usage: -d "Date/Time" "Barcode". If not specified, script will default to looking for the column "inventory_title"', +) +# parser.add_argument('--newline_limit', '-n', required=False, nargs=1, action='store', dest='output_path', type=int, help='Limit fields imported into the description field to a certain number of newlines.') +parser.add_argument( + "--auxiliary", + "-x", + required=False, + nargs=1, + action="store", + dest="aux_parse", + help="Sets how to parse auxiliary files. Options include: extension (by extension), parse (by word), none (no aux files). Default is none.", +) +parser.add_argument( + "--prepend_accession", + "-p", + action="store", + dest="prepend", + type=str, + help="set a string to be added to the beginning of the file accession number when generated", +) args = parser.parse_args() diff --git a/Meadow/mig_av/mig_av/mig_av_parser_funcs.py b/Meadow/mig_av/mig_av/mig_av_parser_funcs.py index 5c06539..2645156 100644 --- a/Meadow/mig_av/mig_av/mig_av_parser_funcs.py +++ b/Meadow/mig_av/mig_av/mig_av_parser_funcs.py @@ -4,53 +4,51 @@ def get_label(role_dict, filename, inventory_label): - - - #run through each key in role_dict - #if it matches on extension, it should be removed and passed to the next check + # run through each key in role_dict + # if it matches on extension, it should be removed and passed to the next check label = None for i in role_dict: if not label: - if role_dict[i]['type'] == 'extension': - if filename.endswith(tuple(role_dict[i]['identifiers'])): - role = role_dict[i]['role'] - if not role == 'X' and not inventory_label: - label = role_dict[i]['label'] + if role_dict[i]["type"] == "extension": + if filename.endswith(tuple(role_dict[i]["identifiers"])): + role = role_dict[i]["role"] + if not role == "X" and not inventory_label: + label = role_dict[i]["label"] elif inventory_label: - label = inventory_label + ' ' + role_dict[i]['label'] + label = inventory_label + " " + role_dict[i]["label"] else: - label = 'Asset ' + role_dict[i]['label'] - file_builder = role_dict[i]['file_builder'] - elif role_dict[i]['type'] == 'xparse': - if any(ext in filename for ext in role_dict[i]['identifiers']): - label = xparser(filename, role_dict[i]['identifiers'], - inventory_label) - role = role_dict[i]['role'] - file_builder = role_dict[i]['file_builder'] - elif role_dict[i]['type'] == 'pattern': - if any(ext in filename for ext in role_dict[i]['identifiers']): + label = "Asset " + role_dict[i]["label"] + file_builder = role_dict[i]["file_builder"] + elif role_dict[i]["type"] == "xparse": + if any(ext in filename for ext in role_dict[i]["identifiers"]): + label = xparser( + filename, role_dict[i]["identifiers"], inventory_label + ) + role = role_dict[i]["role"] + file_builder = role_dict[i]["file_builder"] + elif role_dict[i]["type"] == "pattern": + if any(ext in filename for ext in role_dict[i]["identifiers"]): label = label_creator(filename, inventory_label) - role = role_dict[i]['role'] - file_builder = role_dict[i]['file_builder'] + role = role_dict[i]["role"] + file_builder = role_dict[i]["file_builder"] elif not label: label = filename - role = 'S' - file_builder = '_supplementary_' - return label,role,file_builder + role = "S" + file_builder = "_supplementary_" + return label, role, file_builder def xparser(filename, pattern_list, inventory_label): - - - #TODO use regex instead so numbers could be extracted - parser_dict = {'reel' : ['_Reel', '-Reel'], - 'can' : ['_Can', '-Can'], - 'asset' : ['_Asset', '-Asset'], - 'back' : ['Back.'], - 'front' : ['Front.'], - 'side' : ['Side.'], - 'ephemera' : ['_Ephemera', '-Ephemera'] - } + # TODO use regex instead so numbers could be extracted + parser_dict = { + "reel": ["_Reel", "-Reel"], + "can": ["_Can", "-Can"], + "asset": ["_Asset", "-Asset"], + "back": ["Back."], + "front": ["Front."], + "side": ["Side."], + "ephemera": ["_Ephemera", "-Ephemera"], + } label_list = [] if inventory_label: label_list.append(inventory_label) @@ -58,7 +56,7 @@ def xparser(filename, pattern_list, inventory_label): for a in parser_dict.get(i): if a in filename: label_list.append(i) - #label_list.append(parser_dict(i)) + # label_list.append(parser_dict(i)) label = " ".join(i for i in label_list if i) if not label: label = filename @@ -66,37 +64,35 @@ def xparser(filename, pattern_list, inventory_label): def label_creator(filename, inventory_label): - - - ''' + """ parses item side information from filenames and updates the label accordingly >>> label_creator("P001-TEST-f01i01_v01s02.wav", "Reel 1") 'Reel 1 Side 2' - ''' - pattern_dict = {'side' : 's(\d{2})', - 'part' : 'p(\d{2})', - 'region' : 'r(\d{2})', - 'capture' : 'c(\d{2})' + """ + pattern_dict = { + "side": "s(\d{2})", + "part": "p(\d{2})", + "region": "r(\d{2})", + "capture": "c(\d{2})", } label_list = [inventory_label] - #print(pattern_dict['Side']['abbreviation']) - #regex for anything between pattern (- or _)v## and (- or _ or .) - filename_regex = re.findall(r'[-_]v\d{2}(.*?)[-_.]', filename) - #count pattern to check if it appears multiple times + # print(pattern_dict['Side']['abbreviation']) + # regex for anything between pattern (- or _)v## and (- or _ or .) + filename_regex = re.findall(r"[-_]v\d{2}(.*?)[-_.]", filename) + # count pattern to check if it appears multiple times filename_count = len(filename_regex) if filename_count > 1: - #do not attempt to make sense of pattern collisions + # do not attempt to make sense of pattern collisions print("WARNING: " + filename + " Filename label information was not parsed!") filename_labels = None elif filename_count < 1: filename_labels = None else: - #convert findall results to string + # convert findall results to string filename_regex_string = "".join(filename_regex) - filename_labels = parse_filename_label(filename_regex_string, - pattern_dict) - #Append side string to Label string + filename_labels = parse_filename_label(filename_regex_string, pattern_dict) + # Append side string to Label string if filename_labels: label_list.extend(filename_labels) label = " ".join(i for i in label_list if i) @@ -106,16 +102,13 @@ def label_creator(filename, inventory_label): def parse_filename_label(filename_regex_string, pattern_dict): - - filename_labels = [] for key in pattern_dict.keys(): - component_number_full = re.search(pattern_dict[key], - filename_regex_string) - #strip leading zero from the (\d{2}) of the matched pattern + component_number_full = re.search(pattern_dict[key], filename_regex_string) + # strip leading zero from the (\d{2}) of the matched pattern if component_number_full: component_number_clean = component_number_full[1].lstrip("0") - #construct the "Side String" + # construct the "Side String" component_string = key + " " + component_number_clean else: component_string = None @@ -125,4 +118,5 @@ def parse_filename_label(filename_regex_string, pattern_dict): if __name__ == "__main__": import doctest + doctest.testmod() diff --git a/Meadow/mig_av/run.py b/Meadow/mig_av/run.py index 2962810..9240822 100644 --- a/Meadow/mig_av/run.py +++ b/Meadow/mig_av/run.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 import sys + if sys.version_info[0] < 3: raise Exception("Python 3 or a more recent version is required.") + def main(): import mig_av.mig_av as mig_av + mig_av.mig_av_main() + if __name__ == "__main__": main() From 0c580cf68a7f9ed600d0cece10679c4317f6db2a Mon Sep 17 00:00:00 2001 From: SophiaFrancis1 <112432333+SophiaFrancis1@users.noreply.github.com> Date: Fri, 8 Sep 2023 12:05:09 -0500 Subject: [PATCH 2/3] Create StyleGuidelinesforPython.md --- GeneralInfo/StyleGuidelinesforPython.md | 62 +++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 GeneralInfo/StyleGuidelinesforPython.md diff --git a/GeneralInfo/StyleGuidelinesforPython.md b/GeneralInfo/StyleGuidelinesforPython.md new file mode 100644 index 0000000..5e48f1d --- /dev/null +++ b/GeneralInfo/StyleGuidelinesforPython.md @@ -0,0 +1,62 @@ +Version date: 09/06/2023 + +Document owner: SF + +Examples from PEP8 + +# Below are the standards that we will be using for our Python Scripts + +### Layout +* Use 4 spaces as indentation +* No mixing tabs and spaces! Only use spaces or a tab that converts to spaces +* Max characters per line = 79 +* Wrapped elements should be vertically aligned + * Unless they are an extended if statement. Then use additional indentation +* Use implied line continuation inside parantheses +* Blank lines + * Top Level & Class - surround with 2 blank lines + * Method definitions - surround with 1 blank line + * Between groups - surround with 1 blank line + * Otherwise do NOT use blank lines +* Use UTF-8 encoding + +### Imports +* Each import should be on a separate line +* Listed after module comments and before module globals and module constants +* Group imports + 1. Standard library imports + 2. Related third party imports + 3. Local application/library specific imports +* Blank line between each group +* Absolute only +* No wildcard imports +* + +### Naming Conventions +* Functions + * Lowercase separate words with underscore between words +* Variables + * Lowercase, single letter (prefer not), words separated with underscore +* Class + * Start with capital letter. No underscores +* Method + * Lowercase separates with underscores +* Constant + * Uppercase and separate with underscores +* Modules + * Short lowercase words separated with underscores +* Package + * Short lowercase words separated. No underscores. + +#### Resources: + +##### Auto PEP8 Compliance in VSCode: +* Extensions + * AutoPep8 + * Yapf + * Black + +* Packages that check PEP8 compliance + * PyCodeStyle + * Flake 8 + From 51b6f7e5f072b1e4e4cd0499093e80aa50904a9b Mon Sep 17 00:00:00 2001 From: SophiaFrancis1 <112432333+SophiaFrancis1@users.noreply.github.com> Date: Fri, 8 Sep 2023 12:26:01 -0500 Subject: [PATCH 3/3] AddNewVideoScriptsStyleGuidelinesupdate --- .../VideoScript/Arguments/__init__.py | 0 AV/NewVideoWIP/VideoScript/CSV/__init__.py | 0 .../VideoScript/CSV/generatecodinghistory.py | 30 ++++ .../VideoScript/CSV/writeoutputcsv.py | 15 ++ .../Dictionaries/equipmentdictionary.py | 100 +++++++++++++ .../VideoScript/FFMPEG/TwoPassEncodingH264.py | 113 +++++++++++++++ AV/NewVideoWIP/VideoScript/FFMPEG/__init__.py | 0 .../VideoScript/FFProbe/FFProbeReport.py | 97 +++++++++++++ .../VideoScript/FFProbe/__init__.py | 0 .../FFProbe/parse_ffprobe_metadata_lists.py | 108 +++++++++++++++ .../VideoScript/GraphicalOutPut/__init__.py | 0 .../GraphicalOutPut/audiopmspectrogram.py | 9 ++ .../GraphicalOutPut/spectrograms.py | 24 ++++ AV/NewVideoWIP/VideoScript/Hashlib.py | 29 ++++ AV/NewVideoWIP/VideoScript/JSON/createJSON.py | 54 ++++++++ .../VideoScript/JSON/dictJSONoutput.py | 4 + .../MediaConch/ParseMediaConchResults.py | 11 ++ .../VideoScript/MediaConch/__init__.py | 0 .../VideoScript/PassFail/__init__.py | 0 .../VideoScript/PassFail/losslessCheck.py | 21 +++ .../PassFail/mediaconchpassfail.py | 16 +++ .../VideoScript/QCTools/QCToolsReport.py | 6 + .../VideoScript/QCTools/__init__.py | 0 AV/NewVideoWIP/VideoScript/SetUp/Arguments.py | 131 ++++++++++++++++++ .../VideoScript/SetUp/AssignInputDirectory.py | 3 + .../SetUp/AssignOutputDirectory.py | 3 + .../VideoScript/SetUp/CheckMixDownArgs.py | 9 ++ .../SetUp/CreateTranscodeOutputDirectory.py | 20 +++ .../VideoScript/SetUp/FFMPEGCheck.py | 18 +++ .../VideoScript/SetUp/FFProbeCheck.py | 12 ++ .../VideoScript/SetUp/InputCheck.py | 14 ++ .../VideoScript/SetUp/InventoryCheck.py | 8 ++ .../VideoScript/SetUp/MediaConchCheck.py | 12 ++ .../SetUp/MediaConchPolicyCheck.py | 7 + .../VideoScript/SetUp/OutputCheck.py | 14 ++ AV/NewVideoWIP/VideoScript/SetUp/QcilCheck.py | 12 ++ .../SetUp/RequiredProgramsPresent.py | 11 ++ AV/NewVideoWIP/VideoScript/SetUp/__init__.py | 0 AV/NewVideoWIP/VideoScript/SetUp/importcsv.py | 91 ++++++++++++ .../VideoScript/Stream_MD5_status.py | 8 ++ .../SystemFileDirectory/convertruntime.py | 6 + .../SystemFileDirectory/generatesystemlog.py | 12 ++ .../SystemFileDirectory/guessdate.py | 10 ++ .../StyleGuidelinesforPython.md | 6 +- .../StyleandFormat/StyleandFormatResources.md | 19 +++ 45 files changed, 1061 insertions(+), 2 deletions(-) create mode 100644 AV/NewVideoWIP/VideoScript/Arguments/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/CSV/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/CSV/generatecodinghistory.py create mode 100644 AV/NewVideoWIP/VideoScript/CSV/writeoutputcsv.py create mode 100644 AV/NewVideoWIP/VideoScript/Dictionaries/equipmentdictionary.py create mode 100644 AV/NewVideoWIP/VideoScript/FFMPEG/TwoPassEncodingH264.py create mode 100644 AV/NewVideoWIP/VideoScript/FFMPEG/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/FFProbe/FFProbeReport.py create mode 100644 AV/NewVideoWIP/VideoScript/FFProbe/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/FFProbe/parse_ffprobe_metadata_lists.py create mode 100644 AV/NewVideoWIP/VideoScript/GraphicalOutPut/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/GraphicalOutPut/audiopmspectrogram.py create mode 100644 AV/NewVideoWIP/VideoScript/GraphicalOutPut/spectrograms.py create mode 100644 AV/NewVideoWIP/VideoScript/Hashlib.py create mode 100644 AV/NewVideoWIP/VideoScript/JSON/createJSON.py create mode 100644 AV/NewVideoWIP/VideoScript/JSON/dictJSONoutput.py create mode 100644 AV/NewVideoWIP/VideoScript/MediaConch/ParseMediaConchResults.py create mode 100644 AV/NewVideoWIP/VideoScript/MediaConch/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/PassFail/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/PassFail/losslessCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/PassFail/mediaconchpassfail.py create mode 100644 AV/NewVideoWIP/VideoScript/QCTools/QCToolsReport.py create mode 100644 AV/NewVideoWIP/VideoScript/QCTools/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/Arguments.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/AssignInputDirectory.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/AssignOutputDirectory.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/CheckMixDownArgs.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/CreateTranscodeOutputDirectory.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/FFMPEGCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/FFProbeCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/InputCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/InventoryCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/MediaConchCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/MediaConchPolicyCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/OutputCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/QcilCheck.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/RequiredProgramsPresent.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/__init__.py create mode 100644 AV/NewVideoWIP/VideoScript/SetUp/importcsv.py create mode 100644 AV/NewVideoWIP/VideoScript/Stream_MD5_status.py create mode 100644 AV/NewVideoWIP/VideoScript/SystemFileDirectory/convertruntime.py create mode 100644 AV/NewVideoWIP/VideoScript/SystemFileDirectory/generatesystemlog.py create mode 100644 AV/NewVideoWIP/VideoScript/SystemFileDirectory/guessdate.py rename GeneralInfo/{ => StyleandFormat}/StyleGuidelinesforPython.md (89%) create mode 100644 GeneralInfo/StyleandFormat/StyleandFormatResources.md diff --git a/AV/NewVideoWIP/VideoScript/Arguments/__init__.py b/AV/NewVideoWIP/VideoScript/Arguments/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/CSV/__init__.py b/AV/NewVideoWIP/VideoScript/CSV/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/CSV/generatecodinghistory.py b/AV/NewVideoWIP/VideoScript/CSV/generatecodinghistory.py new file mode 100644 index 0000000..a4e7e67 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/CSV/generatecodinghistory.py @@ -0,0 +1,30 @@ +from Dictionaries import equipmentDict as equipment_dict + + +def generate_coding_history(coding_history, hardware, append_list): + ''' + Formats hardware into BWF style coding history. Takes a piece of hardware (formatted: 'model; serial No.'), splits it at ';' and then searches the equipment dictionary for that piece of hardware. Then iterates through a list of other fields to append in the free text section. If the hardware is not found in the equipment dictionary this will just pull the info from the csv file and leave out some of the BWF formatting. + ''' + equipmentDict = equipment_dict.equipment_dict() + if hardware.split(';')[0] in equipmentDict.keys(): + hardware_history = equipmentDict[hardware.split(';')[0]]['''Coding + Algorithm'''] + ',' + 'T=' + hardware + for i in append_list: + if i: + hardware_history += '; ' + hardware_history += i + if 'Hardware Type' in equipmentDict.get(hardware.split(';')[0]): + hardware_history += '; ' + hardware_history += equipmentDict[hardware.split(';')[0]]['Hardware Type'] + coding_history.append(hardware_history) + #handle case where equipment is not in the equipmentDict using a more general format + elif hardware and not hardware.split(';')[0] in equipmentDict.keys(): + hardware_history = hardware + for i in append_list: + if i: + hardware_history += '; ' + hardware_history += i + coding_history.append(hardware_history) + else: + pass + return coding_history diff --git a/AV/NewVideoWIP/VideoScript/CSV/writeoutputcsv.py b/AV/NewVideoWIP/VideoScript/CSV/writeoutputcsv.py new file mode 100644 index 0000000..d0306e5 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/CSV/writeoutputcsv.py @@ -0,0 +1,15 @@ +import os +import csv + +from VideoScript.SetUp.AssignOutputDirectory import outdir as outdir + + +def write_output_csv(outdir, csvHeaderList, csvWriteList, output_metadata, qcResults): + csv_file = os.path.join(outdir, "qc_log.csv") + csvOutFileExists = os.path.isfile(csv_file) + + with open(csv_file, "a") as f: + writer = csv.writer(f, delimiter=",", lineterminator="\n") + if not csvOutFileExists: + writer.writerow(csvHeaderList) + writer.writerow(csvWriteList) diff --git a/AV/NewVideoWIP/VideoScript/Dictionaries/equipmentdictionary.py b/AV/NewVideoWIP/VideoScript/Dictionaries/equipmentdictionary.py new file mode 100644 index 0000000..a8740fa --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/Dictionaries/equipmentdictionary.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +def equipment_dict(): + + ''' + VTRs + ''' + panasonic_10518 = { + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'], + 'NU Tag' : '10518', + 'Equipment info' : 'Panasonic AG-1980' + } + + sony_10516 = { + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'], + 'NU Tag' : '10516', + 'Equipment info':'Sony SVO-5800' + } + + sony_vo9800 = { + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'], + 'NU Tag': '95095', + 'Equipment info':'Sony VO-9800' + } + + sony_uvw1800 ={ + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'] + } + + pioneer_dvl919 ={ + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'] + } + + panasonic_ajd2390h = { + 'Coding Algorithm' : 'A=ANALOG', + 'Output is one of' : ['Composite', 'S-Video'] + } + + + + + ''' + TBCs + ''' + + + dps_295 = { + 'Coding Algorithm' : 'A=ANALOG', + 'Hardware Type' : 'TBC', + 'Output is one of' : ['Composite', 'S-Video', 'Component'] + } + + fa_510 = { + 'Coding Algorithm' : 'A=ANALOG', + 'Hardware Type' : 'TBC', + 'Output is one of' : ['Composite', 'S-Video', 'Component'] + } + + + + ''' + ADCs + ''' + hd10ava_10519 = { + 'Coding Algorithm' : 'A=SDI', + 'Hardware Type' : 'A/D' + } + + + + ''' + Capture Cards + ''' + decklinkstudio4k = { + 'Coding Algorithm' : 'A=v210', + 'Hardware Type' : 'DIO' + } + + equipment_dict = { + 'Panasonic LP Rack 1' : panasonic_10518, + 'DPS TBC Rack 1' : dps_295, + 'FA-510 Rack 2' : fa_510, + 'AJA Rack 1' : hd10ava_10519, + + 'Black Magic Rack 1' : decklinkstudio4k, + 'Sony SVHS Rack 1' : sony_10516, + 'Sony Betacam' : sony_uvw1800, + 'Sony VO-9800' : sony_vo9800, + 'Pioneer DVL-919' : pioneer_dvl919, + 'Panasonic AJ-D230H' : panasonic_ajd2390h + + + } + + return equipment_dict diff --git a/AV/NewVideoWIP/VideoScript/FFMPEG/TwoPassEncodingH264.py b/AV/NewVideoWIP/VideoScript/FFMPEG/TwoPassEncodingH264.py new file mode 100644 index 0000000..5e68a27 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/FFMPEG/TwoPassEncodingH264.py @@ -0,0 +1,113 @@ +import os +import subprocess +from VideoScript.Arguments.Arguments import args +import VideoScript.FFProbe.parse_ffprobe_metadata_lists as ffprobemeta + + +mixDown4to3 = [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]" "-map", + "0:v", + "-map", + "[a]", + "-map", + "0:a:2", + "-map", + "0:a:3", +] +mixdown2to1 = [ + "-filter_complex", + "[0:a:0][0:a:1]amerge=inputs=2[a]", + "-map", + "0:v", + "-map", + "[a]", +] +mixDown4to2 = [ + "-filter_complex", + """[0:a:0][0:a:1]amerge=inputs=2[a]; [0:a:2] + [0:a:3]amerge=inputs=2[b]""", + "-map", + "0:v", + "-map", + "[a]", + "-map", + "[b]", +] + + +def two_pass_h264_encoding(audiostreamCounter, outputAbsPath, acAbsPath): + if os.name == "nt": + nullOut = "NUL" + else: + nullOut = "/dev/null" + + # Pass One + + pass1 = [args.ffmpeg_path] + + if not args.verbose: + pass1 += ["-loglevel", "error"] + + pass1 += [ + "-y", + "-i", + outputAbsPath, + "-c:v", + "libx264", + "-preset", + "medium", + "-b:v", + "8000k", + "-pix_fmt", + "yuv420p", + "-pass", + "1", + ] + + if audiostreamCounter > 0: + if args.mixdown == "copy": + pass1 += ["-c:a", "aac", "-b:a", "128k"] + if args.mixdown == "4to3" and audiostreamCounter == 4: + pass1 += mixDown4to3 + if args.mixdown == "4to2" and audiostreamCounter == 4: + pass1 += mixDown4to2 + + if args.mixdown == "2to1" and audiostreamCounter == 2: + pass1 += mixdown2to1 + + pass1 += ["-f", "mp4", nullOut] + + # Pass Two + + pass2 = [args.ffmpeg_path] + if not args.verbose: + pass2 += ["-loglevel", "error"] + pass2 += [ + "-y", + "-i", + outputAbsPath, + "-c:v", + "libx264", + "-preset", + "medium", + "-b:v", + "8000k", + "-pix_fmt", + "yuv420p", + "-pass", + "2", + ] + + if audiostreamCounter > 0: + if args.mixdown == "copy": + pass2 += ["-c:a", "aac", "-b:a", "128k"] + if args.mixdown == "4to3" and audiostreamCounter == 4: + pass2 += mixDown4to3 + if args.mixdown == "4to2" and audiostreamCounter == 4: + pass2 += mixDown4to2 + if args.mixdown == "2to1" and audiostreamCounter == 2: + pass2 += mixdown2to1 + pass2 += [acAbsPath] + subprocess.run(pass1) + subprocess.run(pass2) diff --git a/AV/NewVideoWIP/VideoScript/FFMPEG/__init__.py b/AV/NewVideoWIP/VideoScript/FFMPEG/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/FFProbe/FFProbeReport.py b/AV/NewVideoWIP/VideoScript/FFProbe/FFProbeReport.py new file mode 100644 index 0000000..cf572dc --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/FFProbe/FFProbeReport.py @@ -0,0 +1,97 @@ +import subprocess +import json +from Arguments.Arguments import args + + +def ffprobe_report(filename, input_file_abspath): + video_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-selected_streams", + "v", + "-show_entries", + "stream=codec_name, avg_frame_rate, codec_time_base, width,height, pix_fmt, sample_aspect_ratio, display_aspect_ratio, color_range, color_space, color_transfer, color_primaries, chroma_location, field-order, code_tag_string", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + + audio_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=codec_long_name, bits_per_raw_sample, sample_rate, channels", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + + format_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-show_entries", + "format=duration, size, nb_screams", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + + data_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select_streams", + "d", + "-show_entries", + "stream=codec_tag_string", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) + attachment_output = json.loads( + subprocess.check_output( + [ + args.ffprobe_path, + "-v", + "error", + "-select)streams", + "t", + "-showentries", + "stream_tags=filename", + input_file_abspath, + "-of", + "json", + ] + ) + .decode("ascii") + .rstrip() + ) diff --git a/AV/NewVideoWIP/VideoScript/FFProbe/__init__.py b/AV/NewVideoWIP/VideoScript/FFProbe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/FFProbe/parse_ffprobe_metadata_lists.py b/AV/NewVideoWIP/VideoScript/FFProbe/parse_ffprobe_metadata_lists.py new file mode 100644 index 0000000..37a69ab --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/FFProbe/parse_ffprobe_metadata_lists.py @@ -0,0 +1,108 @@ +from Arguments.Arguments import args +import FFProbeReport + + +def ffprobemetadata(): + video_codec_name_list = [ + stream.get("codec_name") for stream in (FFProbeReport.video_output["streams"]) + ] + audio_codec_name_list = [ + stream.get("codec_long_name") + for stream in (FFProbeReport.audio_output["streams"]) + ] + data_streams = [ + stream.get("codec_tag_string") for stream in ("data_output"["streams"]) + ] + width = [stream.get("width") for stream in (FFProbeReport.video_output["streams"])][ + 0 + ] + height = [ + stream.get("height") for stream in (FFProbeReport.video_output["streams"]) + ][0] + pixel_format = [ + stream.get("pix_fmt") for stream in (FFProbeReport.video_output["streams"]) + ][0] + sar = [ + stream.get("sample_aspect_ratio") + for stream in (FFProbeReport.video_output["streams"]) + ][0] + dar = [ + stream.get("display_aspect_ratio") + for stream in (FFProbeReport.video_output["streams"]) + ][0] + framerate = [ + stream.get("avg_frame_rate") + for stream in (FFProbeReport.video_output["streams"]) + ][0] + color_space = [ + stream.get("color_space") for stream in (FFProbeReport.video_output["streams"]) + ][0] + color_range = [ + stream.get("color_range") for stream in (FFProbeReport.video_output["streams"]) + ][0] + color_transfer = [ + stream.get("color_transfer") + for stream in (FFProbeReport.video_output["streams"]) + ][0] + color_primaries = [ + stream.get("color_primaries") + for stream in (FFProbeReport.video_output["streams"]) + ][0] + audio_bitrate = [ + stream.get("bits_per_raw_sample") + for stream in (FFProbeReport.audio_output["streams"]) + ][0] + audio_sample_rate = [ + stream.get("sample_rate") for stream in (FFProbeReport.audio_output["streams"]) + ][0] + audio_channels = [ + stream.get("channels") for stream in (FFProbeReport.audio_output["streams"]) + ][0] + audio_stream_count = len(audio_codec_name_list) + + tags = [ + streams.get("tags") for streams in (FFProbeReport.attachment_output["streams"]) + ] + attachment_list = [] + for i in tags: + attachmentFilename = [i.get("filename")] + attachment_list.extend(attachmentFilename) + + file_metadata = { + "filename": filename, + "filesize": FFProbeReport.format_output.get("format")["size"], + "duration": FFProbeReport.format_output.get("format")["duration"], + "streams": FFProbeReport.format_output.get("format")["nb_streams"], + "video streams": video_codec_name_list, + "audio streams": audio_codec_name_list, + "data streams": data_streams, + "attachments": attachment_list, + } + + techMetaV = { + "width": width, + "height": height, + "sample aspect ratio": sar, + "display aspect ratio": dar, + "pixel format": pixel_format, + "framerate": framerate, + "color space": color_space, + "color range": color_range, + "color primaries": color_primaries, + "color transfer": color_transfer, + } + + techMetaA = { + "audio stream count": audio_stream_count, + "audio bitrate": audio_bitrate, + "audio sample rate": audio_sample_rate, + "channels": audio_channels, + } + + ffprobe_metadata = { + "file metadata": file_metadata, + "techMetaV": techMetaV, + "techMetaA": techMetaA, + } + + return ffprobe_metadata diff --git a/AV/NewVideoWIP/VideoScript/GraphicalOutPut/__init__.py b/AV/NewVideoWIP/VideoScript/GraphicalOutPut/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/GraphicalOutPut/audiopmspectrogram.py b/AV/NewVideoWIP/VideoScript/GraphicalOutPut/audiopmspectrogram.py new file mode 100644 index 0000000..1c8616b --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/GraphicalOutPut/audiopmspectrogram.py @@ -0,0 +1,9 @@ +from spectrograms import generate_spectrogram as generate_spectrogram +from VideoScript.Arguments.Arguments import args + +if audiostreamCounter > 0 and not args.skip_spectrogram: + print("*generating QC spectrograms*") + channel_layout_list = input_metadata["techMetaA"]["channels"] + generate_spectrogram( + output_AbsPath, channel_layout_list, metaOutputFolder, baseFilename + ) diff --git a/AV/NewVideoWIP/VideoScript/GraphicalOutPut/spectrograms.py b/AV/NewVideoWIP/VideoScript/GraphicalOutPut/spectrograms.py new file mode 100644 index 0000000..50c5251 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/GraphicalOutPut/spectrograms.py @@ -0,0 +1,24 @@ +import os +import subprocess +from VideoScript.Arguments.Arguments import args + + +def generate_spectrogram(input, channel_layout_list, outputFolder, outputName): + spectrogram_resolution = "1920x1080" + for index, item in enumerate(channel_layout_list): + output = os.path.join(outputFolder, outputName + "_0a" + str(index) + ".png") + spectrogram_args = [args.ffmpeg_path] + spectrogram_args += ["-loglevel", "error", "-y"] + spectrogram_args += ["-i", input, "-lavfi"] + if item > 1: + spectrogram_args = +[ + "[0:a:%(a)s]showspectrumpic=s=%(b)s" + % {"a": index, "b": spectrogram_resolution} + ] + else: + spectrogram_args += [ + "[0:a:%(a)s]showspectrumpic=s=(b)s" + % {"a": index, "b": spectrogram_resolution} + ] + spectrogram_args += [output] + subprocess.run(spectrogram_args) diff --git a/AV/NewVideoWIP/VideoScript/Hashlib.py b/AV/NewVideoWIP/VideoScript/Hashlib.py new file mode 100644 index 0000000..7f1a1b1 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/Hashlib.py @@ -0,0 +1,29 @@ +import hashlib +import os +import sys +from Arguments.Arguments import args + +""" +Credit: IFI Scripts +""" + + +def hashlib_md5(filename): + read_size = 0 + last_percent_done = 0 + chksm = hashlib.md5() + total_size = os.path.getsize(filename) + with open(filename, "rb") as f: + while True: + buf = f.read(2**20) + if not buf: + break + read_size += len(buf) + chksm.update(buf) + percent_done = 100 * read_size / total_size + if percent_done > last_percent_done: + sys.stdout.write("[%d%%]\r" % percent_done) + sys.stdout.flush() + last_percent_done = percent_done + md5_output = chksm.hexdigest() + return md5_output diff --git a/AV/NewVideoWIP/VideoScript/JSON/createJSON.py b/AV/NewVideoWIP/VideoScript/JSON/createJSON.py new file mode 100644 index 0000000..bbc7078 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/JSON/createJSON.py @@ -0,0 +1,54 @@ +import json + + +def create_json( + jsonAbsPath, + systemInfo, + input_metadata, + mov_stream_sum, + mkvHash, + mkv_stream_sum, + baseFilename, + output_metadata, + item_csvDict, + qcResults, +): + input_techMetaV = input_metadata.get("techMetaV") + input_techMetaA = input_metadata.get("techMetaA") + output_techMetaV = output_metadata.get("techMetaV") + output_techMetaA = output_metadata.get("techMetaA") + output_file_metadata = output_metadata.get("file metadata") + + # create dictionary for json output + data = {} + data[baseFilename] = [] + + # gather pre and post transcode file metadata for json output + ffv1_file_meta = {} + # add stream checksums to metadata + ffv1_md5_dict = {"md5 checksum": mkvHash, "a/v streamMD5s": mkv_stream_sum} + output_file_metadata = {**output_file_metadata, **ffv1_md5_dict} + ffv1_file_meta = {"post-transcode metadata": output_file_metadata} + + # gather technical metadata for json output + techdata = {} + video_techdata = {} + audio_techdata = {} + techdata["technical metadata"] = [] + video_techdata = {"video": input_techMetaV} + audio_techdata = {"audio": input_techMetaA} + techdata["technical metadata"].append(video_techdata) + techdata["technical metadata"].append(audio_techdata) + + # gather metadata from csv dictionary as capture metadata + csv_metadata = {"inventory metadata": item_csvDict} + + system_info = {"system information": systemInfo} + + data[baseFilename].append(csv_metadata) + data[baseFilename].append(system_info) + data[baseFilename].append(ffv1_file_meta) + data[baseFilename].append(techdata) + data[baseFilename].append(qcResults) + with open(jsonAbsPath, "w", newline="\n") as outfile: + json.dump(data, outfile, indent=4) diff --git a/AV/NewVideoWIP/VideoScript/JSON/dictJSONoutput.py b/AV/NewVideoWIP/VideoScript/JSON/dictJSONoutput.py new file mode 100644 index 0000000..511a509 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/JSON/dictJSONoutput.py @@ -0,0 +1,4 @@ +def dictionaryJSONoutput(): + data = {} + data[baseFilename]={} + diff --git a/AV/NewVideoWIP/VideoScript/MediaConch/ParseMediaConchResults.py b/AV/NewVideoWIP/VideoScript/MediaConch/ParseMediaConchResults.py new file mode 100644 index 0000000..2301c87 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/MediaConch/ParseMediaConchResults.py @@ -0,0 +1,11 @@ +def parse_mediaconchResults(mediaconchResults_dict): + if "FAIL" in mediaconchResults_dict.values(): + mediaconchResults="FAIL" + failed_policies=[] + for key in mediaconchResults_dict.keys(): + failed_policies.append(key) + mediaconchResults=(mediaconchResults + ': ' + + str(failed_policies).strip('[]')) + else: + mediaconchResults="PASS" + return mediaconchResults \ No newline at end of file diff --git a/AV/NewVideoWIP/VideoScript/MediaConch/__init__.py b/AV/NewVideoWIP/VideoScript/MediaConch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/PassFail/__init__.py b/AV/NewVideoWIP/VideoScript/PassFail/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/PassFail/losslessCheck.py b/AV/NewVideoWIP/VideoScript/PassFail/losslessCheck.py new file mode 100644 index 0000000..9d4d211 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/PassFail/losslessCheck.py @@ -0,0 +1,21 @@ +def lossless_check(input_metadata, output_metadata, streamMD5status): + if output_metadata.get("output_techMetaA") == input_metadata.get("input_techMetaV"): + QC_techMeta = "PASS" + else: + print("input and output technical metadata do not match") + QC_techMeta = "FAIL" + + losslessCheckDict = { + "technical metadata": QC_techMeta, + "stream checksums": streamMD5status, + } + if "FAIL" in losslessCheckDict.values(): + losslessCheck = "FAIL" + losslessFail = [] + for key in losslessCheckDict.keys(): + if "FAIL" in losslessCheckDict.get(key): + losslessFail.append(key) + losslessCheck = losslessCheck + ": " + str(losslessFail).strip("[]") + else: + lossless_check = "PASS" + return lossless_check diff --git a/AV/NewVideoWIP/VideoScript/PassFail/mediaconchpassfail.py b/AV/NewVideoWIP/VideoScript/PassFail/mediaconchpassfail.py new file mode 100644 index 0000000..ed49cee --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/PassFail/mediaconchpassfail.py @@ -0,0 +1,16 @@ +import subprocess +from VideoScript.SetUp.Arguments import args + + +def mediaconch_policy_check(input, policy): + mediaconchResults = ( + subprocess.check_output([args.mediaconch_path, "--policy=" + policy, input]) + .decode("ascii") + .rstrip() + .split()[0] + ) + if mediaconchResults == "pass!": + mediaconchResults = "PASS" + else: + mediaconchResults = "FAIL" + return mediaconchResults diff --git a/AV/NewVideoWIP/VideoScript/QCTools/QCToolsReport.py b/AV/NewVideoWIP/VideoScript/QCTools/QCToolsReport.py new file mode 100644 index 0000000..3573559 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/QCTools/QCToolsReport.py @@ -0,0 +1,6 @@ +import subprocess +from VideoScript.Arguments.Arguments import args + +def generate_qctools(input): + qctools_args=[args.qcli_path, '-i', input] + subprocess.run(qctools_args) diff --git a/AV/NewVideoWIP/VideoScript/QCTools/__init__.py b/AV/NewVideoWIP/VideoScript/QCTools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/SetUp/Arguments.py b/AV/NewVideoWIP/VideoScript/SetUp/Arguments.py new file mode 100644 index 0000000..938a334 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/Arguments.py @@ -0,0 +1,131 @@ +""" +Argument parser for in-house ffv1 to mp4 script +""" + +import argparse +import sys + +parser = argparse.ArgumentParser() + +parser.add_argument( + "--input", + "-i", + action="store", + dest="input_path", + type=str, + help="full path to input folder", +) +parser.add_argument( + "--output", + "-o", + action="store", + dest="output_path", + type=str, + help="full path to output folder", +) +parser.add_argument( + "--ffmpeg", + action="store", + dest="ffmpeg_path", + default="ffmpeg", + type=str, + help="For setting a custom ffmpeg path", +) +parser.add_argument( + "--ffprobe", + action="store", + dest="ffprobe_path", + default="ffprobe", + type=str, + help="For setting a custom ffprobe path", +) +parser.add_argument( + "--qcli", + action="store", + dest="qcli_path", + default="qcli", + type=str, + help="For setting a custom qcli path", +) +parser.add_argument( + "--mediaconch", + action="store", + dest="mediaconch_path", + default="mediaconch", + type=str, + help="For setting a custom mediaconch path", +) +parser.add_argument( + "--verbose", + required=False, + action="store_true", + help="view ffmpeg output when transcoding", +) +parser.add_argument( + "--mixdown", + action="store", + dest="mixdown", + default="copy", + type=str, + help="How the audio streams will be mapped for the access copy. If excluded, this will default to copying the stream configuration of the input. Inputs include: copy, 4to3, and 4to2. 4to3 takes 4 mono tracks and mixes tracks 1&2 to stereo while leaving tracks 3&4 mono. 4to2 takes 4 mono tracks and mixes tracks 1&2 and 3&4 to stereo.", +) +parser.add_argument( + "--slices", + action="store", + dest="ffv1_slice_count", + default="16", + choices=[4, 6, 9, 12, 16, 24, 30], + type=int, + help="Set the FFV1 slice count used by ffmpeg when losslessly transcoding files. Default is 16.", +) +parser.add_argument( + "--skipac", + required=False, + action="store_true", + dest="skip_ac", + help="skip access copy transcoding", +) +parser.add_argument( + "--skipqcli", + required=False, + action="store_true", + dest="skip_qcli", + help="skip generating qc tools report", +) +parser.add_argument( + "--skipspectrogram", + required=False, + action="store_true", + dest="skip_spectrogram", + help="skip generating spectrograms", +) +parser.add_argument( + "--keep_filename", + required=False, + action="store_true", + dest="keep_filename", + help="MKV preservation master will have the same filename as the source MOV file", +) +parser.add_argument( + "--embed_framemd5", + required=False, + action="store_true", + dest="embed_framemd5", + help="remux preservation file to embed framemd5", +) +parser.add_argument( + "--input_policy", + required=False, + action="store", + dest="input_policy", + help="Mediaconch policy for input files", +) +parser.add_argument( + "--output_policy", + required=False, + action="store", + dest="output_policy", + help="Mediaconch policy for output files", +) + +args = parser.parse_args() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/AssignInputDirectory.py b/AV/NewVideoWIP/VideoScript/SetUp/AssignInputDirectory.py new file mode 100644 index 0000000..f0a5d29 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/AssignInputDirectory.py @@ -0,0 +1,3 @@ +import InputCheck + +indir = InputCheck.input_check() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/AssignOutputDirectory.py b/AV/NewVideoWIP/VideoScript/SetUp/AssignOutputDirectory.py new file mode 100644 index 0000000..808c464 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/AssignOutputDirectory.py @@ -0,0 +1,3 @@ +import OutputCheck + +outdir = OutputCheck.output_check() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/CheckMixDownArgs.py b/AV/NewVideoWIP/VideoScript/SetUp/CheckMixDownArgs.py new file mode 100644 index 0000000..65f0c81 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/CheckMixDownArgs.py @@ -0,0 +1,9 @@ +from Arguments import args + + +def check_mixdown_arg(): + mixdown_list = ["copy", "4to3", "4to2", "2to1"] + if not args.mixdown in mixdown_list: + print("The selected audio mixdown is not a valid value") + print("Please use one of the following: copy, 4to3, 4to2, 2to1") + quit() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/CreateTranscodeOutputDirectory.py b/AV/NewVideoWIP/VideoScript/SetUp/CreateTranscodeOutputDirectory.py new file mode 100644 index 0000000..d501527 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/CreateTranscodeOutputDirectory.py @@ -0,0 +1,20 @@ +import os + + +def create_transcode_output_folders(baseOutput, outputFolderList): + if not os.path.isdir(baseOutput): + try: + os.mkdir(baseOutput) + except: + print("Unable to create output folder:", baseOutput) + else: + print(baseOutput, "already exists") + print("Proceeding") + for folder in outputFolderList: + if not os.path.isdir(folder): + try: + os.mkdir(folder) + except: + print("Unable to create output folder:", folder) + else: + print("Using existing folder", folder, "as output") diff --git a/AV/NewVideoWIP/VideoScript/SetUp/FFMPEGCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/FFMPEGCheck.py new file mode 100644 index 0000000..a90c50a --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/FFMPEGCheck.py @@ -0,0 +1,18 @@ +import subprocess +from Arguments import args + + +def get_ffmpeg_version(): + ffmpeg_version = "ffmpeg" + try: + ffmpeg_version = ( + subprocess.check_output([args.ffmpeg_path, "-version"]) + .decode("ascii") + .rstrip() + .splitlines()[0] + .split()[2] + ) + except: + print("Error getting FFMPEG Version") + quit() + return ffmpeg_version diff --git a/AV/NewVideoWIP/VideoScript/SetUp/FFProbeCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/FFProbeCheck.py new file mode 100644 index 0000000..d7a91be --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/FFProbeCheck.py @@ -0,0 +1,12 @@ +import subprocess +from Arguments import args + + +def ffprobe_check(): + try: + subprocess.check_output({args.ffprobe_path, "-version"}).decode( + "ascii" + ).rstrip().splitlines()[0].split()[2] + except: + print("Error locating FFProbe") + quit() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/InputCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/InputCheck.py new file mode 100644 index 0000000..f29e9f3 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/InputCheck.py @@ -0,0 +1,14 @@ +import os +from Arguments import args + + +def input_check(): + if args.input_path: + indir = args.input_path + else: + print("No input provided") + quit() + if not os.path.isdir(indir): + print("input is not a directory") + quit() + return indir diff --git a/AV/NewVideoWIP/VideoScript/SetUp/InventoryCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/InventoryCheck.py new file mode 100644 index 0000000..280c02c --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/InventoryCheck.py @@ -0,0 +1,8 @@ +def inventory_check(item_csvDict): + if item_csvDict is None: + print ("Unable to locate file in CSV data") + inventoryCheck="FAIL" + else: + print ("Item found in inventory") + inventoryCheck="PASS" + return inventoryCheck \ No newline at end of file diff --git a/AV/NewVideoWIP/VideoScript/SetUp/MediaConchCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/MediaConchCheck.py new file mode 100644 index 0000000..7bfe4fe --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/MediaConchCheck.py @@ -0,0 +1,12 @@ +import subprocess +from Arguments import args + + +def mediaconch_check(): + try: + subprocess.check_output([args.mediaconch_path, "-v"]).decode( + "ascii" + ).rstrip().splitlines()[0] + except: + print("Error locating MediaConch") + quit() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/MediaConchPolicyCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/MediaConchPolicyCheck.py new file mode 100644 index 0000000..ab546dc --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/MediaConchPolicyCheck.py @@ -0,0 +1,7 @@ +import os + +def mediaconch_policy_exists(policy_path): + if not os.path.isfile(policy_path): + print ("Unable to Find Mediaconch Policy:", policy_path) + print ("Check if file exists before running") + quit() \ No newline at end of file diff --git a/AV/NewVideoWIP/VideoScript/SetUp/OutputCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/OutputCheck.py new file mode 100644 index 0000000..7085fd6 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/OutputCheck.py @@ -0,0 +1,14 @@ +import os +from Arguments import args + + +def output_check(): + if args.input_path: + outdir = args.output_path + else: + print("No output given. Using input as directory") + outdir = args.input_path + if not os.path.isdir(outdir): + print("Output is not a directory") + quit() + return outdir diff --git a/AV/NewVideoWIP/VideoScript/SetUp/QcilCheck.py b/AV/NewVideoWIP/VideoScript/SetUp/QcilCheck.py new file mode 100644 index 0000000..d4023dd --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/QcilCheck.py @@ -0,0 +1,12 @@ +import subprocess +from Arguments import args + + +def qcli_check(): + try: + subprocess.check_output( + ([args.qcli_path, "-version"]).decode("ascii").rstrip().splitlines()[0] + ) + except: + print("Error locating qcli") + quit() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/RequiredProgramsPresent.py b/AV/NewVideoWIP/VideoScript/SetUp/RequiredProgramsPresent.py new file mode 100644 index 0000000..019da31 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/RequiredProgramsPresent.py @@ -0,0 +1,11 @@ +import QcilCheck +import FFMPEGCheck +import FFProbeCheck +import MediaConchCheck +from Arguments import args + +if not args.skip_qcli: + QcilCheck.qcli_check() +MediaConchCheck.mediaconch_check() +FFProbeCheck.ffprobe_check() +ffvers = FFMPEGCheck.get_ffmpeg_version() diff --git a/AV/NewVideoWIP/VideoScript/SetUp/__init__.py b/AV/NewVideoWIP/VideoScript/SetUp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AV/NewVideoWIP/VideoScript/SetUp/importcsv.py b/AV/NewVideoWIP/VideoScript/SetUp/importcsv.py new file mode 100644 index 0000000..ec6705f --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SetUp/importcsv.py @@ -0,0 +1,91 @@ +import csv +import sys +from generatecodinghistory import generate_coding_history +from VideoScript.SystemFileDirectory.guessdate import guess_date + +def import_csv(csvInventory): + csvDict={} + try: + with open(csvInventory, encoding='utf-8')as f: + reader = csv.DictReader(f, delimiter=',') + video_fieldnames_list = ['filename', 'work_accession_number', + 'ALMA number/Finding Aid', 'Barcode', 'description', 'Record Date/Time', 'Housing/Container Markings', 'Condition Notes', 'Format', 'Capture Date', 'Digitizer', 'VTR', 'VTR Output Used', 'Tape Brand', 'Tape Record Mode', 'TBC', 'TBC Output Used', 'ADC', 'Capture Card', 'Sound','Region', 'Capture Notes'] + missing_fieldnames = [i for i in video_fieldnames_list if not i in + reader.fieldnames] + if not missing_fieldnames: + for row in reader: + name = row['filename'] + id1 = row['work_accession_number'] + id2 = row['ALMA number/Finding Aid'] + id3 = row['Barcode'] + description = row['description'] + record_date = row['Record Date/Time'] + container_markings = row['Housing/Container Markings'] + container_markings = container_markings.split('\n') + condition_notes = row['Condition Notes'] + format = row['Format'] + captureDate = row['Capture Date'] + + if captureDate: + captureDate = str(guess_date(captureDate)) + digitizationOperator = row['Digitizer'] + vtr = row['VTR'] + vtrOut = row['VTR Output Used'] + tapeBrand = row['Tape Brand'] + recordMode = row['Tape Record Mode'] + tbc = row['TBC'] + tbcOut = row['TBC Output Used'] + adc = row['ADC'] + dio = row['Capture Card'] + sound = row['Sound'] + sound = sound.split('\n') + region = row['Region'] + capture_notes = row['Capture notes'] + coding_history = [] + coding_history = generate_coding_history(coding_history, + vtr, [tapeBrand, recordMode, region, vtrOut]) + coding_history = generate_coding_history(coding_history, + tbc, [tbcOut]) + coding_history = generate_coding_history(coding_history, + adc, [None]) + coding_history = generate_coding_history(coding_history, + dio, [None]) + csvData = {'Accession number/Call number' : id1, + 'ALMA number/Finding Aid' : id2, + 'Barcode' : id3, + 'Description' : description, + 'Record Date' : record_date, + 'Container Markings' : container_markings, + 'Condition Notes' : condition_notes, + 'Format' : format, + 'Digitization Operator' : digitizationOperator, + 'Capture Date' : captureDate, + 'Coding History' : coding_history, + 'Sound Note' : sound, + 'Capture Notes' : capture_notes + } + csvDict.update({name : csvData}) + elif not 'File name' in missing_fieldnames: + print("WARNING: Unable to find all column names in csv file") + print('''"File name column found. Interpreting csv file as + file list"''') + print("CONTINUE? (y/n)") + yes = {'yes','y', 'ye', ''} + no = {'no','n'} + choice = input().lower() + if choice in yes: + for row in reader: + name = row['File name'] + csvData = {} + csvDict.update({name : csvData}) + elif choice in no: + quit() + else: + sys.stdout.write("Please respond with 'yes' or 'no'") + quit() + else: + print("No matching column names found in csv file") + #print(csvDict) + except FileNotFoundError: + print("Issue importing csv file") + return csvDict \ No newline at end of file diff --git a/AV/NewVideoWIP/VideoScript/Stream_MD5_status.py b/AV/NewVideoWIP/VideoScript/Stream_MD5_status.py new file mode 100644 index 0000000..9802258 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/Stream_MD5_status.py @@ -0,0 +1,8 @@ +def stream_md5_status(input_streammd5, output_streammd5): + if output_streammd5 == input_streammd5: + print("Stream checksums match. Your file is lossless") + streamMD5status = "PASS" + else: + print("Stream checksums do not match. Your output file may not be lossless") + streamMD5status = "FAIL" + return streamMD5status diff --git a/AV/NewVideoWIP/VideoScript/SystemFileDirectory/convertruntime.py b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/convertruntime.py new file mode 100644 index 0000000..ce775fb --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/convertruntime.py @@ -0,0 +1,6 @@ +import time + + +def convert_runtime(duration): + runtime = time.strftime("%H:%M:%S", time.gmtime(float(duration))) + return runtime diff --git a/AV/NewVideoWIP/VideoScript/SystemFileDirectory/generatesystemlog.py b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/generatesystemlog.py new file mode 100644 index 0000000..34bf76f --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/generatesystemlog.py @@ -0,0 +1,12 @@ +import platform + + +def generate_system_log(ffvers, tstime, tftime): + osinfo = platform.platform() + systemInfo = { + "operating system": osinfo, + "ffmpeg version": ffvers, + "transcode start time": tstime, + "transcode end time": tftime, + } + return systemInfo diff --git a/AV/NewVideoWIP/VideoScript/SystemFileDirectory/guessdate.py b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/guessdate.py new file mode 100644 index 0000000..a3662f3 --- /dev/null +++ b/AV/NewVideoWIP/VideoScript/SystemFileDirectory/guessdate.py @@ -0,0 +1,10 @@ +import datetime + + +def guess_date (string): + for fmt in ["%m/%d/%Y","%d-%m-%Y", "%m/%d/%y", "%Y-%m-%d"]: + try: + return datetime.datetime.strptime(string, fmt).date() + except ValueError: + continue + raise ValueError (string) \ No newline at end of file diff --git a/GeneralInfo/StyleGuidelinesforPython.md b/GeneralInfo/StyleandFormat/StyleGuidelinesforPython.md similarity index 89% rename from GeneralInfo/StyleGuidelinesforPython.md rename to GeneralInfo/StyleandFormat/StyleGuidelinesforPython.md index 5e48f1d..ffed7a2 100644 --- a/GeneralInfo/StyleGuidelinesforPython.md +++ b/GeneralInfo/StyleandFormat/StyleGuidelinesforPython.md @@ -32,11 +32,11 @@ Examples from PEP8 * No wildcard imports * -### Naming Conventions +### Naming Conventions (These are ideals.) * Functions * Lowercase separate words with underscore between words * Variables - * Lowercase, single letter (prefer not), words separated with underscore + * Lowercase, words separated with underscore * Class * Start with capital letter. No underscores * Method @@ -48,6 +48,8 @@ Examples from PEP8 * Package * Short lowercase words separated. No underscores. +*HOWEVER, if you have anything with an abbreviation, capitalize the whole abbreviation + #### Resources: ##### Auto PEP8 Compliance in VSCode: diff --git a/GeneralInfo/StyleandFormat/StyleandFormatResources.md b/GeneralInfo/StyleandFormat/StyleandFormatResources.md new file mode 100644 index 0000000..bee9421 --- /dev/null +++ b/GeneralInfo/StyleandFormat/StyleandFormatResources.md @@ -0,0 +1,19 @@ +Version date: 09/06/2023 + +Document owner: SF + + +### List of resources relating to Style and Formatting of Python + +#### Books +* Clean Code in Python - Mariano Anaya -- available through O'Reilly +* PEP8 Guidelines: Python Clean Coding - Martin Yanev -- avaliable through O'Reilly +* The Art of Clean Code - Christian Mayer -- available through O'Reilly + + +### Web Articles + +* How to Write Beautiful Python Code with PEP 8 -- https://realpython.com/python-pep8/ +* PEP 8 - Style Guide for Python Code -- https://peps.python.org/pep-0008/ +* PEP-8 Tutorial: Code Standards in Python -- https://www.datacamp.com/tutorial/pep8-tutorial-python-code +