diff --git a/nemosis/data_fetch_methods.py b/nemosis/data_fetch_methods.py index ce785f0..81ae706 100644 --- a/nemosis/data_fetch_methods.py +++ b/nemosis/data_fetch_methods.py @@ -350,7 +350,7 @@ def _get_read_function(fformat, table_type, day): func = _read_mms_csv else: func = _read_constructed_csv - elif table_type == 'DAILY_REGION_SUMMARY': + elif table_type in ['DAILY_REGION_SUMMARY', "NEXT_DAY_DISPATCHLOAD"]: func = _read_constructed_csv return func @@ -695,7 +695,7 @@ def _determine_columns_and_read_csv( else: type = str if ( - _defaults.table_types[table_name] in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY"] + _defaults.table_types[table_name] in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY", "NEXT_DAY_DISPATCHLOAD"] and not read_all_columns ): headers = read_csv_func(csv_file, nrows=1).columns.tolist() @@ -706,7 +706,7 @@ def _determine_columns_and_read_csv( ] data = read_csv_func(csv_file, usecols=columns, dtype=type) elif ( - _defaults.table_types[table_name] in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY"] + _defaults.table_types[table_name] in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY", "NEXT_DAY_DISPATCHLOAD"] and read_all_columns ): data = read_csv_func(csv_file, dtype=type) @@ -864,5 +864,6 @@ def _static_table_wrapper_for_gui( "INTERCONNECTOR": _dynamic_data_wrapper_for_gui, "INTERCONNECTORCONSTRAINT": _dynamic_data_wrapper_for_gui, "MARKET_PRICE_THRESHOLDS": _dynamic_data_wrapper_for_gui, - "DAILY_REGION_SUMMARY": _dynamic_data_wrapper_for_gui + "DAILY_REGION_SUMMARY": _dynamic_data_wrapper_for_gui, + "NEXT_DAY_DISPATCHLOAD": _dynamic_data_wrapper_for_gui } diff --git a/nemosis/defaults.py b/nemosis/defaults.py index 64fddb6..a9a57ba 100644 --- a/nemosis/defaults.py +++ b/nemosis/defaults.py @@ -1,6 +1,7 @@ names = { "FCAS Providers": "NEM Registration and Exemption List.xls", "DISPATCHLOAD": "PUBLIC_DVD_DISPATCHLOAD", + "NEXT_DAY_DISPATCHLOAD": "PUBLIC_NEXT_DAY_DISPATCHLOAD", "DUDETAILSUMMARY": "PUBLIC_DVD_DUDETAILSUMMARY", "DUDETAIL": "PUBLIC_DVD_DUDETAIL", "DISPATCHCONSTRAINT": "PUBLIC_DVD_DISPATCHCONSTRAINT", @@ -38,6 +39,7 @@ table_types = { "FCAS Providers": "STATICXL", "DISPATCHLOAD": "MMS", + "NEXT_DAY_DISPATCHLOAD": "NEXT_DAY_DISPATCHLOAD", "DUDETAILSUMMARY": "MMS", "DUDETAIL": "MMS", "DISPATCHCONSTRAINT": "MMS", @@ -74,7 +76,7 @@ dynamic_tables = [ table for table, type in table_types.items() - if type in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY", "FCAS"] + if type in ["MMS", "BIDDING", "DAILY_REGION_SUMMARY", "NEXT_DAY_DISPATCHLOAD", "FCAS"] ] return_tables = list(names.keys()) @@ -82,6 +84,7 @@ display_as_AMEO = [ "FCAS Providers", "DISPATCHLOAD", + "NEXT_DAY_DISPATCHLOAD", "DUDETAILSUMMARY", "DUDETAIL", "DISPATCHCONSTRAINT", @@ -127,7 +130,8 @@ current_data_page_urls = { "BIDDING": "Reports/Current/Bidmove_Complete/", - "DAILY_REGION_SUMMARY": "/Reports/Current/Daily_Reports/" + "DAILY_REGION_SUMMARY": "/Reports/Current/Daily_Reports/", + "NEXT_DAY_DISPATCHLOAD": "/Reports/Current/NEXT_DAY_DISPATCH/" } fcas_4_url = "http://www.nemweb.com.au/Reports/Current/Causer_Pays/FCAS_{}{}{}{}.zip" @@ -234,6 +238,31 @@ "LOWERREGENABLEMENTMAX", "LOWERREGENABLEMENTMIN", ], + "NEXT_DAY_DISPATCHLOAD": [ + "SETTLEMENTDATE", + "DUID", + "INTERVENTION", + "DISPATCHMODE", + "AGCSTATUS", + "INITIALMW", + "TOTALCLEARED", + "RAMPDOWNRATE", + "RAMPUPRATE", + "LOWER5MIN", + "LOWER60SEC", + "LOWER6SEC", + "RAISE5MIN", + "RAISE60SEC", + "RAISE6SEC", + "LOWERREG", + "RAISEREG", + "SEMIDISPATCHCAP", + "AVAILABILITY", + "RAISEREGENABLEMENTMAX", + "RAISEREGENABLEMENTMIN", + "LOWERREGENABLEMENTMAX", + "LOWERREGENABLEMENTMIN", + ], "TRADINGLOAD": [ "SETTLEMENTDATE", "DUID", @@ -705,6 +734,7 @@ "SETTLEMENTDATE", ], "DISPATCHLOAD": ["SETTLEMENTDATE", "INTERVENTION", "DUID"], + "NEXT_DAY_DISPATCHLOAD": ["SETTLEMENTDATE", "INTERVENTION", "DUID"], "DISPATCH_UNIT_SCADA": ["SETTLEMENTDATE", "DUID"], "FCAS_4_SECOND": ["TIMESTAMP", "ELEMENTNUMBER", "VARIABLENUMBER"], "ELEMENTS_FCAS_4_SECOND": ["ELEMENTNUMBER"], @@ -744,6 +774,7 @@ primary_date_columns = { "DISPATCHLOAD": "SETTLEMENTDATE", + "NEXT_DAY_DISPATCHLOAD": "SETTLEMENTDATE", "TRADINGLOAD": "SETTLEMENTDATE", "TRADINGPRICE": "SETTLEMENTDATE", "TRADINGREGIONSUM": "SETTLEMENTDATE", diff --git a/nemosis/downloader.py b/nemosis/downloader.py index 9e5317b..298cb0b 100644 --- a/nemosis/downloader.py +++ b/nemosis/downloader.py @@ -34,21 +34,6 @@ def run(year, month, day, index, filename_stub, down_load_to): logger.warning(f"{filename_stub} not downloaded") -def run_bidding_tables_by_day(year, month, day, index, filename_stub, down_load_to): - """This function""" - - bid_move_complete_url = "https://nemweb.com.au/Reports/Current/Bidmove_Complete/PUBLIC_BIDMOVE_COMPLETE_{year}{month}{day}" - bid_move_complete_url = bid_move_complete_url.format(year=year, month=month, day=day) - bid_move_complete_url = _get_matching_link(url="https://nemweb.com.au/Reports/Current/Bidmove_Complete/", - stub_link=bid_move_complete_url) - - # Perform the download, unzipping saving of the file - try: - download_unzip_csv(bid_move_complete_url, down_load_to) - except Exception: - logger.warning(f"{filename_stub} not downloaded") - - def run_bid_tables(year, month, day, index, filename_stub, down_load_to): if day is None: run(year, month, day, index, filename_stub, down_load_to) @@ -78,6 +63,17 @@ def run_next_day_region_tables(year, month, day, index, filename_stub, down_load logger.warning(f"{filename_stub} not downloaded") +def run_next_dispatch_tables(year, month, day, index, filename_stub, down_load_to): + try: + filename_stub = "PUBLIC_NEXT_DAY_DISPATCH_{year}{month}{day}".format(year=year, month=month, day=day) + download_url = _get_current_url( + filename_stub, + defaults.current_data_page_urls["NEXT_DAY_DISPATCHLOAD"]) + _download_and_unpack_next_dispatch_load_files_complete_files(download_url, down_load_to) + except Exception: + logger.warning(f"{filename_stub} not downloaded") + + def _get_current_url(filename_stub, current_page_url): sub_url = _get_matching_link( url=defaults.nem_web_domain_url + current_page_url, @@ -148,6 +144,31 @@ def _download_and_unpack_next_region_tables( ), index=False, ) + + +def _download_and_unpack_next_dispatch_load_files_complete_files( + download_url, down_load_to +): + r = requests.get(download_url, headers=USR_AGENT_HEADER) + zipped_file = zipfile.ZipFile(io.BytesIO(r.content)) + + file_name = zipped_file.namelist()[ + 0 + ] # Just one file so we can pull it out of the list using 0 + start_row_second_table = _find_start_row_nth_table( + zipped_file, file_name, 2 + ) + csv_file = zipped_file.open(file_name) + NEXT_DAY_DISPATCHLOAD = pd.read_csv( + csv_file, header=1, nrows=start_row_second_table - 3, dtype=str + ) + NEXT_DAY_DISPATCHLOAD.to_csv( + os.path.join( + down_load_to, + "PUBLIC_NEXT_DAY_DISPATCHLOAD_" + file_name[25:33] + ".csv", + ), + index=False, + ) def _find_start_row_nth_table(sub_folder_zipfile, file_name, n): diff --git a/nemosis/processing_info_maps.py b/nemosis/processing_info_maps.py index e00e652..f24e63f 100644 --- a/nemosis/processing_info_maps.py +++ b/nemosis/processing_info_maps.py @@ -11,6 +11,7 @@ setup = { "DISPATCHLOAD": None, + "NEXT_DAY_DISPATCHLOAD": None, "TRADINGLOAD": None, "TRADINGPRICE": None, "TRADINGREGIONSUM": None, @@ -46,6 +47,7 @@ search_type = { "DISPATCHLOAD": "start_to_end", + "NEXT_DAY_DISPATCHLOAD": "start_to_end", "TRADINGLOAD": "start_to_end", "TRADINGPRICE": "start_to_end", "TRADINGREGIONSUM": "start_to_end", @@ -81,6 +83,7 @@ date_cols = { "DISPATCHLOAD": ["SETTLEMENTDATE"], + "NEXT_DAY_DISPATCHLOAD": ["SETTLEMENTDATE"], "TRADINGLOAD": ["SETTLEMENTDATE"], "TRADINGPRICE": ["SETTLEMENTDATE"], "TRADINGREGIONSUM": ["SETTLEMENTDATE"], @@ -116,6 +119,7 @@ filter = { "DISPATCHLOAD": filters.filter_on_settlementdate, + "NEXT_DAY_DISPATCHLOAD": filters.filter_on_settlementdate, "TRADINGLOAD": filters.filter_on_settlementdate, "TRADINGPRICE": filters.filter_on_settlementdate, "TRADINGREGIONSUM": filters.filter_on_settlementdate, @@ -151,6 +155,7 @@ finalise = { "DISPATCHLOAD": None, + "NEXT_DAY_DISPATCHLOAD": None, "TRADINGLOAD": None, "TRADINGPRICE": None, "TRADINGREGIONSUM": None, @@ -224,6 +229,7 @@ date_gen = { "MMS": date_generators.year_and_month_gen, + "NEXT_DAY_DISPATCHLOAD": date_generators.current_gen, "BIDDING": date_generators.bid_table_gen, "DAILY_REGION_SUMMARY": date_generators.current_gen, "FCAS": date_generators.year_month_day_index_gen, @@ -231,6 +237,7 @@ write_filename = { "MMS": write_file_names.write_file_names, + "NEXT_DAY_DISPATCHLOAD": write_file_names.write_file_names_current, "BIDDING": write_file_names.write_file_names_mms_and_current, "DAILY_REGION_SUMMARY": write_file_names.write_file_names_mms_and_current, "FCAS": write_file_names.write_file_names_fcas, @@ -238,6 +245,7 @@ downloader = { "MMS": downloader.run, + "NEXT_DAY_DISPATCHLOAD": downloader.run_next_dispatch_tables, "BIDDING": downloader.run_bid_tables, "DAILY_REGION_SUMMARY": downloader.run_next_day_region_tables, "FCAS": downloader.run_fcas4s, diff --git a/nemosis/write_file_names.py b/nemosis/write_file_names.py index a8ac333..d21b639 100644 --- a/nemosis/write_file_names.py +++ b/nemosis/write_file_names.py @@ -20,6 +20,15 @@ def write_file_names_mms_and_current(name, month, year, day, index, raw_data_loc return filename_stub, path_and_name +def write_file_names_current(name, month, year, day, index, raw_data_location): + # Add the year and month information to the generic AEMO file name + filename_stub = ( + defaults.names[name] + "_" + str(year) + str(month) + str(day) + ) + path_and_name = os.path.join(raw_data_location, filename_stub) + return filename_stub, path_and_name + + def write_file_names_fcas(name, month, year, day, index, raw_data_location): # Add the year and month information to the generic AEMO file name filename_stub = defaults.names[name] + "_" + str(year) + str(month) + day + index diff --git a/setup.py b/setup.py index ea1b4bc..ea4009e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="nemosis", - version="3.3.0", + version="3.4.0", author="Nicholas Gorman, Abhijith Prakash", author_email="n.gorman305@gmail.com", description="A tool for accessing AEMO data.", diff --git a/tests/test_data_fetch_methods.py b/tests/test_data_fetch_methods.py index 40910a7..15a434f 100644 --- a/tests/test_data_fetch_methods.py +++ b/tests/test_data_fetch_methods.py @@ -5,6 +5,7 @@ from nemosis import defaults import pandas as pd from nemosis import custom_tables +import pytz class TestDynamicDataCompilerWithSettlementDateFiltering(unittest.TestCase): @@ -802,17 +803,36 @@ def test_dispatch_tables_straddle_years(self): print("Passed") -class TestDynamicDataCompilerWithSettlementDateFilteringDailyRegionSsummary( +class TestDynamicDataCompilerWithSettlementDateFilteringNextDayTables( unittest.TestCase ): + def setUp(self): + self.table_names = ["DAILY_REGION_SUMMARY", "NEXT_DAY_DISPATCHLOAD"] + + self.table_filters = { + "DAILY_REGION_SUMMARY": ["REGIONID"], + "NEXT_DAY_DISPATCHLOAD": ["DUID"], + } + + # Filter for bids at the start of the 2021-06-01 file and the end of the 2021-05-31, to make sure that we aren't + # skipping any of the data file rows. + self.filter_values = { + "DAILY_REGION_SUMMARY": ( + ["NSW1"], + ), + "NEXT_DAY_DISPATCHLOAD": ( + ['AGLHAL'], + ) + } + def test_dispatch_tables_start_of_month(self): start_time = "2022/11/01 00:00:00" end_time = "2022/11/01 05:15:00" - for table in ["DAILY_REGION_SUMMARY"]: + for table in self.table_names: print(f"Testing {table} returning values at start of month one.") dat_col = defaults.primary_date_columns[table] - expected_length = 63 * 5 + expected_length = 63 * 1 expected_number_of_columns = len(defaults.table_columns[table]) expected_first_time = pd.to_datetime( start_time, format="%Y/%m/%d %H:%M:%S" @@ -825,6 +845,8 @@ def test_dispatch_tables_start_of_month(self): defaults.raw_data_cache, fformat="feather", keep_csv=True, + filter_cols=self.table_filters[table], + filter_values=self.filter_values[table] ) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) @@ -836,10 +858,68 @@ def test_dispatch_tables_start_of_month(self): def test_dispatch_tables_middle_of_month_and_day(self): start_time = "2022/11/05 12:00:00" end_time = "2022/11/05 17:15:00" - for table in ["DAILY_REGION_SUMMARY"]: + for table in self.table_names: + print(f"Testing {table} returning values at start of month one.") + dat_col = defaults.primary_date_columns[table] + expected_length = 63 * 1 + expected_number_of_columns = len(defaults.table_columns[table]) + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + data = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + table, + defaults.raw_data_cache, + fformat="feather", + keep_csv=True, + filter_cols=self.table_filters[table], + filter_values=self.filter_values[table] + ) + data = data.reset_index(drop=True) + self.assertEqual(expected_length, data.shape[0]) + self.assertEqual(expected_number_of_columns, data.shape[1]) + self.assertEqual(expected_first_time, data[dat_col][0]) + self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) + print("Passed") + + def test_dispatch_tables_start_market_day(self): + start_time = "2022/11/05 04:00:00" + end_time = "2022/11/05 04:05:00" + for table in self.table_names: + print(f"Testing {table} returning values at start of month one.") + dat_col = defaults.primary_date_columns[table] + expected_length = 1 + expected_number_of_columns = len(defaults.table_columns[table]) + expected_first_time = pd.to_datetime( + start_time, format="%Y/%m/%d %H:%M:%S" + ) + timedelta(minutes=5) + expected_last_time = pd.to_datetime(end_time, format="%Y/%m/%d %H:%M:%S") + data = data_fetch_methods.dynamic_data_compiler( + start_time, + end_time, + table, + defaults.raw_data_cache, + fformat="feather", + keep_csv=True, + filter_cols=self.table_filters[table], + filter_values=self.filter_values[table] + ) + data = data.reset_index(drop=True) + self.assertEqual(expected_length, data.shape[0]) + self.assertEqual(expected_number_of_columns, data.shape[1]) + self.assertEqual(expected_first_time, data[dat_col][0]) + self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) + print("Passed") + + def test_dispatch_tables_end_market_day(self): + start_time = "2022/11/05 03:55:00" + end_time = "2022/11/05 04:00:00" + for table in self.table_names: print(f"Testing {table} returning values at start of month one.") dat_col = defaults.primary_date_columns[table] - expected_length = 63 * 5 + expected_length = 1 expected_number_of_columns = len(defaults.table_columns[table]) expected_first_time = pd.to_datetime( start_time, format="%Y/%m/%d %H:%M:%S" @@ -852,6 +932,8 @@ def test_dispatch_tables_middle_of_month_and_day(self): defaults.raw_data_cache, fformat="feather", keep_csv=True, + filter_cols=self.table_filters[table], + filter_values=self.filter_values[table] ) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) diff --git a/tests/test_processing_info_maps.py b/tests/test_processing_info_maps.py index 8894ab5..55c2442 100644 --- a/tests/test_processing_info_maps.py +++ b/tests/test_processing_info_maps.py @@ -24,7 +24,7 @@ def test_start_to_end_no_duplication_between_batches(self): "2018/01/01 00:00:00", "%Y/%m/%d %H:%M:%S" ) end_time = datetime.strptime("2018/03/01 00:00:00", "%Y/%m/%d %H:%M:%S") - if table_name in ["DAILY_REGION_SUMMARY"]: + if table_name in ["DAILY_REGION_SUMMARY", "NEXT_DAY_DISPATCHLOAD"]: end_time = self.time_yesterday start_time = self.time_yesterday - timedelta(days=8) if table_name in ["FCAS_4_SECOND"]: