From 04223a42a9a8386d753228fc284bda4d50852459 Mon Sep 17 00:00:00 2001 From: Seb James Date: Fri, 18 Oct 2024 00:40:18 +0100 Subject: [PATCH] Tools tidy up, to address #211 --- morph/tools.h | 187 +++++++++++++++----------------------------------- 1 file changed, 54 insertions(+), 133 deletions(-) diff --git a/morph/tools.h b/morph/tools.h index b989fe37..75b78ffd 100644 --- a/morph/tools.h +++ b/morph/tools.h @@ -352,9 +352,8 @@ namespace morph */ static int convertCHexCharSequences (std::string& input) { - // This converts a string containing C style hex sequences - // like "\x41\x42\x43" into the corresponding characters - // ("ABC" for the example). + // This converts a string containing C style hex sequences like + // "\x41\x42\x43" into the corresponding characters ("ABC" for the example). std::string::iterator readPos = input.begin(); std::string::iterator writePos = input.begin(); @@ -557,10 +556,9 @@ namespace morph data.insert (ptr, replaceTerm); count++; if (ptr >= stl) { - // This is a move backwards along the - // string far enough that we don't - // match a substring of the last - // replaceTerm in the next search. + // This is a move backwards along the string far enough that we + // don't match a substring of the last replaceTerm in the next + // search. pos = ptr - stl; } else { break; @@ -579,8 +577,7 @@ namespace morph } /*! - * Return the number of instances of the character c - * in line. + * Return the number of instances of the character c in line. */ static unsigned int countChars (const std::string& line, const char c) { @@ -627,8 +624,8 @@ namespace morph // 1) Replace chars which are disallowed in an XML tag std::string::size_type ptr = std::string::npos; - // We allow numeric and alpha chars, the underscore and the - // hyphen. colon strictly allowed, but best avoided. + // We allow numeric and alpha chars, the underscore and the hyphen. colon + // strictly allowed, but best avoided. while ((ptr = str.find_last_not_of (CHARS_NUMERIC_ALPHA"_-", ptr)) != std::string::npos) { // Replace the char with an underscore: str[ptr] = '_'; @@ -655,55 +652,11 @@ namespace morph } } - /*! - * split csv into a vector - */ - static std::vector csvToVector (const std::string& csvList, - const char separator = ',', - const bool ignoreTrailingEmptyVal = true); - - /*! - * split csv into a list - */ - static std::list csvToList (const std::string& csvList, - const char separator = ','); - /*! - * split csv into a set - */ - static std::set csvToSet (const std::string& csvList, - const char separator = ','); - - /*! - * Output a vector of strings as a csv string. - */ - static std::string vectorToCsv (const std::vector& vecList, - const char separator = ','); - - /*! - * Output a list of strings as a csv string. - */ - static std::string listToCsv (const std::list& listList, - const char separator = ','); - - /*! - * Output a set of strings as a csv string. - */ - static std::string setToCsv (const std::set& listList, - const char separator = ','); - - /*! - * Split a comma-separated key/value pair list into a map. - */ - static std::map csvToMap (const std::string& csvList, - const char relationship = '=', - const char separator = ','); /*! * Split a string of values into a vector using the separator string (not char) * passed in as "separator". If ignoreTrailingEmptyVal is true, then a trailing * separator with nothing after it will NOT cause an additional empty value in - * the returned vector. - * - * Similar to util::splitString but FASTER. PREFER THIS OVER splitString. + * the returned vector. See also splitStringWithEncs */ static std::vector stringToVector (const std::string& s, const std::string& separator, @@ -716,8 +669,7 @@ namespace morph std::string entry(""); std::string::size_type sepLen = separator.size(); std::string::size_type a=0, b=0; - while (a < s.size() - && (b = s.find (separator, a)) != std::string::npos) { + while (a < s.size() && (b = s.find (separator, a)) != std::string::npos) { entry = s.substr (a, b-a); theVec.push_back (entry); a=b+sepLen; @@ -742,8 +694,8 @@ namespace morph #ifdef __WIN__ /*! - * Stat a file Windows style, return true if the file exists - * and is any kind of file except a directory. + * Stat a file Windows style, return true if the file exists and is any kind of + * file except a directory. */ static bool fileExists (const std::string& path) { @@ -1278,13 +1230,11 @@ namespace morph /*! * Check read/write access for the specified file. * - * Checks whether read/write access, as indicated by - * the accessType string, is available for the - * specified file. + * Checks whether read/write access, as indicated by the accessType string, is + * available for the specified file. * - * \param filepath File to check. - * \param accessType Indicates which access type(s) to - * check. r=read, w=write. + * \param filepath File to check. \param accessType Indicates which access + * type(s) to check. r=read, w=write. */ static bool checkAccess (const std::string& filepath, const std::string& accessType) { @@ -1494,9 +1444,8 @@ namespace morph #endif /*! - * Copy from one file pointer to another. Both are - * expected to be open, neither is closed after the - * copy. + * Copy from one file pointer to another. Both are expected to be open, neither + * is closed after the copy. */ static void copyFile (FILE* from, FILE* to) { @@ -1771,7 +1720,7 @@ namespace morph try { Tools::readDirectoryTree (files, dirPath, olderThanSeconds); } catch (const std::exception& e) { - //DBG ("Failed to read dir tree: " << e.what()); + // Failed to read dir tree return; } std::vector::iterator i = files.begin(); @@ -1788,7 +1737,7 @@ namespace morph } } catch (const std::exception& e) { - //DBG ("Failed to unlink " << *i << ": " << e.what()); + // Failed to unlink *i } ++i; } @@ -1849,8 +1798,7 @@ namespace morph struct stat * buf = NULL; buf = static_cast(malloc (sizeof (struct stat))); if (!buf) { // Malloc error. - throw std::runtime_error ("Failed to malloc buf; " - "could not stat link " + fileName); + throw std::runtime_error ("Failed to malloc buf; could not stat link " + fileName); } memset (buf, 0, sizeof(struct stat)); if (stat (fileName.c_str(), buf)) { @@ -1902,17 +1850,14 @@ namespace morph if (stat (fileName.c_str(), &buf)) { // no file to stat - //DBG ("stat() error for '" << fileName << "'"); continue; } if (static_cast(time(NULL)) - buf.st_mtime <= olderThanSeconds) { - // The age of the last modification is less - // than olderThanSeconds, so skip - // (we're only returning the OLDER + // The age of the last modification is less than + // olderThanSeconds, so skip (we're only returning the OLDER // files) - //DBG ("File " << fileName << " is too new to include, continuing"); continue; } //else DBG ("File " << fileName << " is older than " << olderThanSeconds << " s"); } @@ -2016,9 +1961,8 @@ namespace morph } ++levelDirCount; - // Because we found a directory, this current - // directory ain't empty - recurse with a new - // directory in the subDir path: + // Because we found a directory, this current directory ain't empty + // - recurse with a new directory in the subDir path: std::string newSubDir; if (subDir.empty()) { newSubDir = static_cast(ep->d_name); @@ -2095,9 +2039,8 @@ namespace morph } ++levelDirCount; - // Because we found a directory, this current - // directory ain't empty - recurse with a new - // directory in the subDir path: + // Because we found a directory, this current directory ain't empty + // - recurse with a new directory in the subDir path: std::string newSubDir; if (subDir.empty()) { newSubDir = static_cast(ep->d_name); @@ -2255,10 +2198,9 @@ namespace morph } #endif /*! - * Given the month as an int, where 1==Jan, 12==Dec, - * return the month as a string. If shortFormat is true, - * return "Jan", "Dec", etc., otherwise "January", - * "December" etc. + * Given the month as an int, where 1==Jan, 12==Dec, return the month as a + * string. If shortFormat is true, return "Jan", "Dec", etc., otherwise + * "January", "December" etc. */ static std::string monthStr (const int month, const bool shortFormat=false) { @@ -2354,8 +2296,8 @@ namespace morph } /*! - * Give the number n, return the suitable (english) - * suffix. E.g. "st" for 1, "nd" for 22 etc. + * Give the number n, return the suitable (english) suffix. E.g. "st" for 1, + * "nd" for 22 etc. */ static std::string suffix (const int n) { @@ -2387,11 +2329,10 @@ namespace morph } /*! - * Convert a date of form 2009-02-16 to the unix epoch - * number. The fifth character of the string is - * examined, and if it is not a numeral, it is used as - * the separator. If the fifth character IS a numeral, - * then the date format is read in as YYYYMMDD. + * Convert a date of form 2009-02-16 to the unix epoch number. The fifth + * character of the string is examined, and if it is not a numeral, it is used + * as the separator. If the fifth character IS a numeral, then the date format + * is read in as YYYYMMDD. */ static time_t dateToNum (const std::string& dateStr) { @@ -2472,15 +2413,12 @@ namespace morph } /*! - * Convert a date/time of form 2009-02-16 14:34:34 to - * the unix epoch number. The fifth character of the - * string is examined, and if it is not a numeral, it - * is used as the date separator. If the fifth - * character IS a numeral, then the date format is - * read in as YYYYMMDD. + * Convert a date/time of form 2009-02-16 14:34:34 to the unix epoch number. The + * fifth character of the string is examined, and if it is not a numeral, it is + * used as the date separator. If the fifth character IS a numeral, then the + * date format is read in as YYYYMMDD. * - * The 3rd char after the space is read in and used as - * time separator + * The 3rd char after the space is read in and used as time separator */ static time_t dateTimeToNum (const std::string& dateTimeStr) { @@ -2582,9 +2520,8 @@ namespace morph #ifndef __WIN__ /*! - * Convert a unix epoch number to a date/time of form - * 2009-02-16 02:03:01, using dateSeparator to delimit - * the date and timeSeparator to delimit the time. + * Convert a unix epoch number to a date/time of form 2009-02-16 02:03:01, using + * dateSeparator to delimit the date and timeSeparator to delimit the time. */ static std::string numToDateTime (const time_t epochSeconds, const char dateSeparator = '\0', @@ -2638,8 +2575,8 @@ namespace morph } /*! - * Convert a unix epoch number to a date of form - * 2009-02-16, using separator to delimit the date. + * Convert a unix epoch number to a date of form 2009-02-16, using separator to + * delimit the date. */ static std::string numToDate (const time_t epochSeconds, const char separator = '\0') { @@ -2704,15 +2641,13 @@ namespace morph * * \param s The string to split up * - * \param separatorChars The chars used only to - * separate tokens (" ,;") + * \param separatorChars The chars used only to separate tokens (" ,;") * - * \param enclosureChars The characters used to - * enclose a multi-word token ("\"\'") + * \param enclosureChars The characters used to enclose a multi-word token + * ("\"\'") * - * \param the escape character. If not set to \0, then - * this is the character used to escape the enclosure - * chars. + * \param the escape character. If not set to \0, then this is the character + * used to escape the enclosure chars. */ template static std::vector splitStringWithEncs (const ST& s, @@ -2763,7 +2698,6 @@ morph::Tools::splitStringWithEncs (const ST& s, ++a; // Skip the enclosure char } else if ((separatorChars.find_first_of (static_cast(s[a]), 0)) != ST::npos) { // First char is a ',' This special case means that we insert an entry for the current ',' and step past it. - //DBG2 ("First char special case, insert entry."); theVec.push_back (""); ++a; @@ -2772,7 +2706,6 @@ morph::Tools::splitStringWithEncs (const ST& s, } else { // Not first field if ((a = s.find_first_of (sepsAndEncsAndEsc, a)) == ST::npos) { - //DBG ("No enclosure, separator or escape chars in string"); theVec.push_back (s); return theVec; } @@ -2788,12 +2721,10 @@ morph::Tools::splitStringWithEncs (const ST& s, ++a; // Skip the enclosure char } else if ((separatorChars.find_first_of (static_cast(s[a]), 0)) != ST::npos) { // It's a field separator - //DBG2 ("Field separator found at position " << a << " skipping..."); ++a; // Skip the separator if (a >= sz) { // Special case - a trailing separator character - add an empty // value to the return vector of tokens. - //DBG2 ("Adding trailing empty field due to trailing separator"); theVec.push_back (""); } else { // a < sz, so now check if we've hit an escape char @@ -2804,9 +2735,7 @@ morph::Tools::splitStringWithEncs (const ST& s, ++a; // Skip the enclosure char } } - } else { - //throw std::runtime_error ("Tools::splitStringWithEncs: Unexpected case"); - } + } // else unexpected case. } // Check we didn't over-run @@ -2815,7 +2744,6 @@ morph::Tools::splitStringWithEncs (const ST& s, // Now get the token typename ST::size_type range = ST::npos; if (nextIsEnc) { - //DBG2 ("Searching for next instances of enc chars: >" << enclosureChars << "< "); c = a; while ((b = s.find_first_of (currentEncChar, c)) != ST::npos) { // FIXME: Check we didn't find an escaped enclosureChar. @@ -2831,41 +2759,34 @@ morph::Tools::splitStringWithEncs (const ST& s, break; } } else { - //DBG2 ("Searching for next instances of sep chars: >" << separatorChars << "< from position " << a); if ((b = s.find_first_of (separatorChars, a)) != ST::npos) { // Check it wasn't an escaped separator: if (escapeChar) { c = b; --c; if (c >= 0 && c != ST::npos && c < sz && s[c] == escapeChar) { - //DBG2 ("Found escaped separator character"); c = b; ++c; continue; } } range = b - a; - //DBG2 ("On finding a separator char at position " << b - //<< " (starting from position " << a << "), have set range to " << range); } } entry = s.substr (a, range); Tools::stripChars (entry, escapeChar); - //DBG2 ("Adding entry '" << entry << "' to vector"); theVec.push_back (entry); if (range != ST::npos) { // end of field was not end of string if (nextIsEnc) { - //DBG2 ("Adding " << range + 1 << " to a (" << a << ") as nextIsEnc..."); a += range + 1; // +1 to take us past the closing enclosure. } else { - //DBG2 ("Adding " << range << " to a (" << a << ")..."); - a += range; // in new scheme, we want to find the separator, so this - // places us ON the separator. + // in new scheme, we want to find the separator, so this places us ON the separator. + a += range; + } } else { a = range; } - //DBG2 ("...a is now " << a); } return theVec;