diff --git a/bibliografie.txt b/bibliografie.txt new file mode 100644 index 0000000..9c4e6f4 --- /dev/null +++ b/bibliografie.txt @@ -0,0 +1,28 @@ + + + +BIBLIOGRAFIE + + + + 1. AGARD - Aeronautical Multilingual Dictionary London, New York, Oxford, Paris, Pergamon Press, 1960. + 2. Dictionary of Technical Terms for Aerospace Washington D.C., US Government Printing Office, 1965. + 3. Dictionnaire astronautique multilingue Prague, Academia Publishing House of the Czechoslovak Academy of Science, 1970. + 4. Dictionnaire des techniques aérospatiales français-anglais et anglais-français Paris, Gauthier-Villars, 1971. + 5. Dorian Angelo Francis & Osenton James - Elsevier’s Dictionary of Aeronautics in Six Languages Amsterdam, London, New York, Elsevier Publishing Company, 1964. + 6. Strijevskii S.J. - Franțuzko-russkii aviaționno-tehniceskii slovari Moskva, Fizmatghizd, 1963. + 7. Pantazopol D., Oprișiu C., Rodan Gh. - Dicționar de aeronautică francez-român București, Editura Tehnică, 1983. + 8. Oprișiu C., Pantazopol D., Rodan Gh., Ștefănescu D.M. - Dicționar de aeronautică englez-român București, Editura Tehnică, 1997. + 9. Multilingual Space Dictionary Paris, International Academy of Astronautics Budapest, The Last Word Foundation, 1997. + 10. Dicționar englez-român București, Editura Academiei, 1974. + 11. Dicționar poliglot de mașini și construcții de mașini București, Editura Tehnică, 1969. + 12. Dicționar tehnic englez-român București, Editura Tehnică, 1967. + 13. Dicționar tehnic francez-român București, Editura Tehnică, 1969. + 14. Dicționar tehnic rus-român București, Editura Tehnică, 1975. + 15. Dicționar de medicină aeronautică București, Institutul Național de Informare și Documentare, 1995. + 16. Oxford Dictionary of Current English Oxford, Clarendon Press. + 17. Dicționar explicativ al limbii române, DEX București, Editura Academiei. + 18. Îndreptar ortografic, ortoepic și de punctuație București, Editura Academiei. + 19. Publicații periodice de specialitate. + + diff --git a/caseta_tehnica.txt b/caseta_tehnica.txt new file mode 100644 index 0000000..8ee1167 --- /dev/null +++ b/caseta_tehnica.txt @@ -0,0 +1,9 @@ +Autori: Cornel Oprișiu, Dan Pantazopol, Gheorghe Rodan, Dan-Mihai Ștefănescu +Editor: - +Tehnoredactor: - +Tehnoredactare computerizată: Marilena Ghemuleț, Anca Rodan, Petre Rodan +Program calculator: Mihai Radu +Coperta: - + +București, 1999 + diff --git a/cuvint_inainte.txt b/cuvint_inainte.txt new file mode 100644 index 0000000..331e545 --- /dev/null +++ b/cuvint_inainte.txt @@ -0,0 +1,17 @@ + + + +CUVÎNT ÎNAINTE + + +Aminteam în prefața unor lucrări anterioare7,8 despre faptul că dorința omului de a zbura l-a împins, din momentul în care i-a descoperit secretul, la un progres extrem de rapid, cu un tempo egalat numai de electronică și informatică. Dar, am adăuga noi, toate acestea au fost dezvoltate în primul rând pentru a satisface cerințele industriei aerospațiale. Activitatea frenetică desfășurată în cercetarea, dezvoltarea, proiectarea și fabricația produselor aeronautice și spațiale presupune un efort uriaș de cooperare și colaborare între instituții cu profile variate și cu o răspândire remarcabilă. Mai mult, cerințele draconice de calitate și costurile foarte mari de dezvoltare a noilor produse impun selectarea de către proiectant numai a celor mai performante componente și tehnologii. Toate acestea conduc la analiza unui volum uriaș de informație, informație care astăzi nu mai este apanajul unei singure țări. Dacă la cele de mai sus adăugăm necesitatea cunoașterii cerințelor beneficiarilor ajungem la concluzia că, pentru a activa astăzi în industria aerospațială (atât ca producător cât și ca beneficiar) sunt necesare bogate cunoștințe lingvistice. Tendințele de globalizare a economiei și culturii umane s-au făcut simțite în primul rând în domeniul aerospațial (în deceniul 90 au avut loc fuziuni spectaculoase pe de o parte între producători și pe de altă parte între utilizatori), ajungându-se la crearea unor companii multinaționale. Cel mai semnificativ exemplu îl reprezintă industria aerospațială europeană, care cuprinde producători și utilizatori ale căror instituții au unități majore aflate în Anglia, Franța, Germania, Italia, Spania și având colaboratori încă în alte câteva țări. +România, cu o industrie aerospațială dezvoltată, dacă o raportăm la mărimea țării, nu poate menține acest nivel decât prin cooperare și integrare cu marile firme aerospațiale. Apare astfel în mod natural necesitatea unui instrument rapid de sprijin al comunicării intr-un mediu multinațional. Autorii speră că un dicționar poliglot, cuprinzând, pe lângă limbile cu circulație majoră în domeniul aerospațial și limba română, poate fi util tuturor celor care au în vreun fel interese în această ramură a activității umane. +Dicționarul cuprinde termenii generici din domeniul aerospațial în șase limbi: română, engleză, franceză, germană, italiană și spaniolă. Sunt prezentați circa 3800 de termeni pe care autorii îi consideră indispensabili pentru înțelegerea unui text de specialitate, fie că se referă la cercetare, industrie, exploatare sau comerț. +Pentru a păstra legătura cu progresele necontenite din mijloacele de informare, dicționarul este însoțit și de o aplicație informatizată (două dischete), care permite o serie de facilități pentru cei care o pot utiliza. +Sperăm că lucrarea de față va aduce o contribuție importantă la menținerea poziției pe care România o are în domeniul aerospațial, într-un moment în care competența, eficiența și rapiditatea reacțiilor sunt esențiale. +În încheiere dorim să mulțumim tuturor celor care prin activitatea lor ne-au sprijinit să ducem la bun sfârșit o muncă dificilă, și în unele cazuri complet nouă. + + + +AUTORII + diff --git a/lib/stardict/config-custom.h b/lib/stardict/config-custom.h new file mode 100644 index 0000000..248380d --- /dev/null +++ b/lib/stardict/config-custom.h @@ -0,0 +1,79 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef CONFIG_CUSTOM_H +#define CONFIG_CUSTOM_H + +/* include this file at the bottom of config.h +the following config.h files should be affected: +lib/config.h +dict/config.h +dict/msvc_2008/config.h +tools/config.h +*/ + +#include +#include + +/* g_stat function is declared differently depending on the version of glib +GLIB_MICRO_VERSION version number may be incorrect, adjust it if needed. +Use stardict_stat_t in the source code to define a structure for g_stat. */ +#if GLIB_CHECK_VERSION(2, 25, 0) + #if defined(G_OS_UNIX) && !defined(G_STDIO_NO_WRAP_ON_UNIX) + // int g_stat (const gchar *filename, struct stat *buf); + typedef struct stat stardict_stat_t; + #else + // int g_stat (const gchar *filename, GStatBuf *buf); + typedef GStatBuf stardict_stat_t; + #endif +#elif GLIB_CHECK_VERSION(2, 24, 0) + #if defined(G_OS_UNIX) && !defined(G_STDIO_NO_WRAP_ON_UNIX) + // #define g_stat stat + typedef struct stat stardict_stat_t; + #else /* ! G_OS_UNIX */ + #ifdef G_OS_WIN32 + #if defined (_MSC_VER) && !defined(_WIN64) + // #define _g_stat_struct _stat32 + #else + // #define _g_stat_struct stat + #endif + // int g_stat (const gchar *filename, + // struct _g_stat_struct *buf); + typedef struct _g_stat_struct stardict_stat_t; + #else + // int g_stat (const gchar *filename, + // struct stat *buf); + typedef struct stat stardict_stat_t; + #endif + #endif /* G_OS_UNIX */ +#elif GLIB_CHECK_VERSION(2, 20, 0) + #if defined(G_OS_UNIX) && !defined(G_STDIO_NO_WRAP_ON_UNIX) + // #define g_stat stat + typedef struct stat stardict_stat_t; + #else + // int g_stat (const gchar *filename, + // struct stat *buf); + typedef struct stat stardict_stat_t; + #endif +#else + // int g_stat (const gchar *filename, struct stat *buf); + typedef struct stat stardict_stat_t; +#endif + +#endif // CONFIG_CUSTOM_H diff --git a/lib/stardict/ifo_file.cpp b/lib/stardict/ifo_file.cpp new file mode 100644 index 0000000..ae15ef9 --- /dev/null +++ b/lib/stardict/ifo_file.cpp @@ -0,0 +1,522 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include "ifo_file.h" +#include "libcommon.h" + +#define NORM_DICT_MAGIC_DATA "StarDict's dict ifo file" +#define TREE_DICT_MAGIC_DATA "StarDict's treedict ifo file" +#define RES_DB_MAGIC_DATA "StarDict's storage ifo file" + +/* Skip new line (LF, CR+LF, CR), return pointer to the post- new line char. +Return NULL if no new line. */ +static const char* skip_new_line(const char *p) +{ + if(!p) + return NULL; + if(*p == '\n') + return ++p; + if(*p == '\r') { + ++p; + if(*p == '\n') + ++p; + return p; + } + return NULL; +} + +static void decode_description(const char *p, long len, std::string &description) +{ + description.clear(); + const char *p1 = p; + while (p1 - p < len) { + if (*p1 == '<') { + p1++; + if ((*p1 == 'b' || *p1 == 'B') && (*(p1+1)=='r' || *(p1+1)=='R') && *(p1+2)=='>') { + description += '\n'; + p1+=3; + } else { + description += '<'; + } + } else { + description += *p1; + p1++; + } + } +} + +/* replace new lines with "
" sequence */ +static void encode_description(const char *p, long len, std::string &description) +{ + description.clear(); + const char *p1 = p; + while(p1 - p < len) { + if(*p1 == '\r' || *p1 == '\n') { + description += "
"; + p1 = skip_new_line(p1); + } else { + description += *p1; + ++p1; + } + } +} + +/* extract key - value pair having the following format: + * key=value + * Empty lines and lines containing only spaces and tabs are skipped. + * Leading and trailing blanks, as well as blanks around the equal sign are discarded. + * + * Parameters: + * p1 - beginning of the string. + * + * Return value: + * NULL if key - value pair was not found + * != NULL otherwise. It is a pointer to the beginning of the next string. */ +const char* DictInfo::get_key_value(const char *line_beg, std::string& key, + std::string& value) +{ + key.clear(); + value.clear(); + while(true) { + const size_t n1 = strcspn(line_beg, "\r\n"); + const size_t n2 = strspn(line_beg, " \t"); + const char* const line_end = line_beg + n1; + if(*line_end == '\0') { // EOF reached + if(n1 != n2) + g_warning("%s: line %d: Last line is not terminated with new line char.", + ifo_file_name.c_str(), lineno); + return NULL; + } + // new line char found + g_assert(*line_end == '\r' || *line_end == '\n'); + if(n1 == n2) { // empty line + line_beg = skip_new_line(line_end); + ++lineno; + continue; + } + const char* const key_beg = line_beg + n2; // first non-blank char + const char *equal_sign = key_beg; + while(*equal_sign != '=' && equal_sign < line_end) + ++equal_sign; + if(*equal_sign != '=') { + g_warning("%s: line %d: '=' not found.", ifo_file_name.c_str(), lineno); + line_beg = skip_new_line(line_end); + ++lineno; + continue; + } + const char *key_end=equal_sign; + while(key_beg < key_end && (*(key_end-1) == ' ' || *(key_end-1) == '\t')) + --key_end; + key.assign(key_beg, key_end-key_beg); + const char *val_beg = equal_sign+1; + const char *val_end = line_end; + while(val_beg < line_end && (*val_beg == ' ' || *val_beg == '\t')) + ++val_beg; + while(val_beg < val_end && (*(val_end-1) == ' ' || *(val_end-1) == '\t')) + --val_end; + value.assign(val_beg, val_end-val_beg); + line_beg = skip_new_line(line_end); + // no ++lineno; here + return line_beg; + } +} + +DictInfo::DictInfo(void) +{ + clear(); +} + +bool DictInfo::load_from_ifo_file(const std::string& ifofilename, + DictInfoType infotype) +{ + clear(); + ifo_file_name=ifofilename; + set_infotype(infotype); + glib::CharStr buffer; + glib::Error error; + if (!g_file_get_contents(ifo_file_name.c_str(), get_addr(buffer), NULL, get_addr(error))) { + g_critical("Load %s failed. Error: %s.", ifo_file_name.c_str(), error->message); + return false; + } + const gchar *p1 = get_impl(buffer); + + if(g_str_has_prefix(p1, UTF8_BOM)) + p1 += 3; + if(!g_utf8_validate(p1, -1, NULL)) { + g_critical("Load %s failed: Invalid UTF-8 encoded text.", ifo_file_name.c_str()); + return false; + } + lineno = 1; + + const gchar *magic_data = NULL; + if(infotype == DictInfoType_NormDict) + magic_data = NORM_DICT_MAGIC_DATA; + else if(infotype == DictInfoType_TreeDict) + magic_data = TREE_DICT_MAGIC_DATA; + else if(infotype == DictInfoType_ResDb) + magic_data = RES_DB_MAGIC_DATA; + else + return false; + if (!g_str_has_prefix(p1, magic_data)) { + g_critical("Load %s failed: Incorrect magic data.", ifo_file_name.c_str()); + if(g_str_has_prefix(p1, NORM_DICT_MAGIC_DATA)) + g_message("File '%s' is an index-based dictionary.", ifo_file_name.c_str()); + else if(g_str_has_prefix(p1, TREE_DICT_MAGIC_DATA)) + g_message("File '%s' is a tree dictionary.", ifo_file_name.c_str()); + else if(g_str_has_prefix(p1, RES_DB_MAGIC_DATA)) + g_message("File '%s' is a resource database.", ifo_file_name.c_str()); + else + g_message("File '%s' is not a StarDict dictionary or it's broken.", ifo_file_name.c_str()); + return false; + } + p1 += strlen(magic_data); + p1 = skip_new_line(p1); + if(!p1) { + g_critical("Load %s failed: Incorrect magic data.", ifo_file_name.c_str()); + return false; + } + + std::string key, value; + while(true) { + ++lineno; + p1 = get_key_value(p1, key, value); + if(!p1) + break; + + // version must the first option + if(!is_version()) { + if(key != "version") { + g_critical("Load %s failed: \"version\" must be the first option.", ifo_file_name.c_str()); + return false; + } + } + if(key == "version") { + if(!check_option_duplicate(f_version, "version")) + continue; + set_version(value); + if(infotype == DictInfoType_NormDict) { + if(version != "2.4.2" && version != "3.0.0") { + g_critical("Load %s failed: Unknown version.", ifo_file_name.c_str()); + return false; + } + } else if(infotype == DictInfoType_TreeDict) { + if(version != "2.4.2") { + g_critical("Load %s failed: Unknown version.", ifo_file_name.c_str()); + return false; + } + } else if(infotype == DictInfoType_ResDb) { + if(version != "3.0.0") { + g_critical("Load %s failed: Unknown version.", ifo_file_name.c_str()); + return false; + } + } + } else if(key == "idxoffsetbits") { + if(!check_option_duplicate(f_idxoffsetbits, "idxoffsetbits")) + continue; + if(value != "32") { + // TODO + g_critical("Load %s failed: idxoffsetbits != 32 not supported presently.", + ifo_file_name.c_str()); + return false; + } + } else if(key == "wordcount" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_wordcount, "wordcount")) + continue; + set_wordcount(atol(value.c_str())); + } else if(key == "filecount" && infotype == DictInfoType_ResDb) { + if(!check_option_duplicate(f_filecount, "filecount")) + continue; + set_filecount(atol(value.c_str())); + } else if(key == "synwordcount" && infotype == DictInfoType_NormDict) { + if(!check_option_duplicate(f_synwordcount, "synwordcount")) + continue; + set_synwordcount(atol(value.c_str())); + } else if(key == "tdxfilesize" && infotype == DictInfoType_TreeDict) { + if(!check_option_duplicate(f_index_file_size, "tdxfilesize")) + continue; + set_index_file_size(atol(value.c_str())); + } else if(key == "idxfilesize" && infotype == DictInfoType_NormDict) { + if(!check_option_duplicate(f_index_file_size, "idxfilesize")) + continue; + set_index_file_size(atol(value.c_str())); + } else if(key == "ridxfilesize" && infotype == DictInfoType_ResDb) { + if(!check_option_duplicate(f_index_file_size, "ridxfilesize")) + continue; + set_index_file_size(atol(value.c_str())); + } else if(key == "dicttype" && infotype == DictInfoType_NormDict) { + if(!check_option_duplicate(f_dicttype, "dicttype")) + continue; + set_dicttype(value); + } else if(key == "bookname" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_bookname, "bookname")) + continue; + set_bookname(value); + } else if(key == "author" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_author, "author")) + continue; + set_author(value); + } else if(key == "email" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_email, "email")) + continue; + set_email(value); + } else if(key == "website" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_website, "website")) + continue; + set_website(value); + } else if(key == "date" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_date, "date")) + continue; + set_date(value); + } else if(key == "description" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_description, "description")) + continue; + std::string temp; + decode_description(value.c_str(), value.length(), temp); + set_description(temp); + } else if(key == "sametypesequence" && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + if(!check_option_duplicate(f_sametypesequence, "sametypesequence")) + continue; + set_sametypesequence(value); + } else { + g_message("Load %s warning: unknown option %s.", ifo_file_name.c_str(), + key.c_str()); + } + } + + // check required options + if((!is_wordcount() || wordcount == 0) && ((infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict))) { + g_critical("Load %s failed: wordcount not specified or 0.", + ifo_file_name.c_str()); + return false; + } + if((!is_filecount() || filecount == 0) && infotype == DictInfoType_ResDb) { + g_critical("Load %s failed: filecount not specified or 0.", + ifo_file_name.c_str()); + return false; + } + if((!is_bookname() || bookname.empty()) && (infotype == DictInfoType_NormDict + || infotype == DictInfoType_TreeDict)) { + g_critical("Load %s failed: bookname not specified.", + ifo_file_name.c_str()); + return false; + } + if(!is_index_file_size() || index_file_size == 0) { + const char* kkey; + if(infotype == DictInfoType_NormDict) + kkey = "idxfilesize"; + else if(infotype == DictInfoType_TreeDict) + kkey = "tdxfilesize"; + else if(infotype == DictInfoType_ResDb) + kkey = "ridxfilesize"; + else + kkey = ""; + g_critical("Load %s failed: %s not specified or 0.", + ifo_file_name.c_str(), kkey); + return false; + } + + return true; +} + +bool DictInfo::save_ifo_file(void) const +{ + if(ifo_file_name.empty()) { + g_critical("Fail to save ifo file. ifo file name is not specified."); + return false; + } + std::stringstream str; + //str << UTF8_BOM; + if(!is_infotype()) { + g_critical("Fail to save ifo file. Dict info type is not specified."); + return false; + } + const gchar *magic_data = NULL; + if(infotype == DictInfoType_NormDict) + magic_data = NORM_DICT_MAGIC_DATA; + else if(infotype == DictInfoType_TreeDict) + magic_data = TREE_DICT_MAGIC_DATA; + else if(infotype == DictInfoType_ResDb) + magic_data = RES_DB_MAGIC_DATA; + else + return false; + str << magic_data << '\n'; + if(!is_version()) { + g_critical("Fail to save ifo file. version is not specified."); + return false; + } + str << "version=" << version << '\n'; + if(infotype == DictInfoType_NormDict || infotype == DictInfoType_TreeDict) { + if(!is_bookname()) { + g_critical("Fail to save ifo file. bookname is not specified."); + return false; + } + str << "bookname=" << bookname << '\n'; + if(!is_wordcount()) { + g_critical("Fail to save ifo file. wordcount is not specified."); + return false; + } + str << "wordcount=" << wordcount << '\n'; + } + if(infotype == DictInfoType_NormDict) { + if(is_synwordcount()) + str << "synwordcount=" << synwordcount << '\n'; + } + if(infotype == DictInfoType_ResDb) { + if(is_filecount()) + str << "filecount=" << filecount << '\n'; + } + if(infotype == DictInfoType_NormDict || infotype == DictInfoType_TreeDict + || infotype == DictInfoType_ResDb) { + if(!is_index_file_size()) { + g_critical("Fail to save ifo file. index_file_size is not specified."); + return false; + } + if(infotype == DictInfoType_NormDict) + str << "idxfilesize=" << index_file_size << '\n'; + if(infotype == DictInfoType_TreeDict) + str << "tdxfilesize=" << index_file_size << '\n'; + if(infotype == DictInfoType_ResDb) + str << "ridxfilesize=" << index_file_size << '\n'; + } + if(infotype == DictInfoType_NormDict || infotype == DictInfoType_TreeDict) { + if(is_author()) + str << "author=" << author << '\n'; + if(is_email()) + str << "email=" << email << '\n'; + if(is_website()) + str << "website=" << website << '\n'; + if(is_description()) { + std::string temp; + encode_description(description.c_str(), description.length(), temp); + str << "description=" << temp << '\n'; + } + if(is_date()) + str << "date=" << date << '\n'; + if(is_sametypesequence()) + str << "sametypesequence=" << sametypesequence << '\n'; + } + if(infotype == DictInfoType_NormDict) { + if(is_dicttype()) + str << "dicttype=" << dicttype << '\n'; + } + if(!g_file_set_contents(ifo_file_name.c_str(), str.str().c_str(), -1, NULL)) { + g_critical("Fail to save ifo file." open_write_file_err, ifo_file_name.c_str()); + return false; + } + return true; +} + +void DictInfo::clear(void) +{ + ifo_file_name.clear(); + wordcount = 0; + filecount = 0; + synwordcount = 0; + bookname.clear(); + author.clear(); + email.clear(); + website.clear(); + date.clear(); + description.clear(); + index_file_size = 0; + sametypesequence.clear(); + dicttype.clear(); + version.clear(); + lineno = -1; + + f_wordcount = false; + f_filecount = false; + f_synwordcount = false; + f_bookname = false; + f_author = false; + f_email = false; + f_website = false; + f_date = false; + f_description = false; + f_index_file_size = false; + f_sametypesequence = false; + f_dicttype = false; + f_version = false; + f_idxoffsetbits = false; + f_infotype = false; +} + +DictInfo& DictInfo::operator=(const DictInfo& dict_info) +{ + clear(); + ifo_file_name = dict_info.ifo_file_name; + + if(dict_info.is_wordcount()) + set_wordcount(dict_info.get_wordcount()); + if(dict_info.is_filecount()) + set_filecount(dict_info.get_filecount()); + if(dict_info.is_synwordcount()) + set_synwordcount(dict_info.get_synwordcount()); + if(dict_info.is_bookname()) + set_bookname(dict_info.get_bookname()); + if(dict_info.is_author()) + set_author(dict_info.get_author()); + if(dict_info.is_email()) + set_email(dict_info.get_email()); + if(dict_info.is_website()) + set_website(dict_info.get_website()); + if(dict_info.is_date()) + set_date(dict_info.get_date()); + if(dict_info.is_description()) + set_description(dict_info.get_description()); + if(dict_info.is_index_file_size()) + set_index_file_size(dict_info.get_index_file_size()); + if(dict_info.is_sametypesequence()) + set_sametypesequence(dict_info.get_sametypesequence()); + if(dict_info.is_dicttype()) + set_dicttype(dict_info.get_dicttype()); + if(dict_info.is_version()) + set_version(dict_info.get_version()); + if(dict_info.is_infotype()) + set_infotype(dict_info.get_infotype()); + + f_idxoffsetbits = dict_info.f_idxoffsetbits; + return *this; +} + +bool DictInfo::check_option_duplicate(bool& flag, const char* option) +{ + if(flag) { + g_warning("%s: line %d: duplicate option %s.", ifo_file_name.c_str(), lineno, option); + return false; + } + flag = true; + return true; +} diff --git a/lib/stardict/ifo_file.h b/lib/stardict/ifo_file.h new file mode 100644 index 0000000..60a8a05 --- /dev/null +++ b/lib/stardict/ifo_file.h @@ -0,0 +1,138 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef _IFO_FILE_H_ +#define _IFO_FILE_H_ + +#include +#include +#include +#include "libcommon.h" + +#define GET_METHOD_TEMPL(type, field) \ + type get_##field(void) const \ + { \ + return field; \ + } + +#define SET_METHOD_TEMPL(type, field) \ + void set_##field(type field) \ + { \ + this->field = field; \ + this->f_##field = true; \ + } + +#define UNSET_METHOD_TEMPL(type, field, default_val) \ + void unset_##field(void) \ + { \ + this->f_##field = false; \ + this->field = default_val; \ + } + +#define IS_METHOD_TEMPL(type, field) \ + bool is_##field(void) const \ + { \ + return f_##field; \ + } + +#define ALL_METHOD_TEMPL(type, field, default_val) \ + GET_METHOD_TEMPL(type, field) \ + SET_METHOD_TEMPL(type, field) \ + UNSET_METHOD_TEMPL(type, field, default_val) \ + IS_METHOD_TEMPL(type, field) + +enum DictInfoType { + DictInfoType_NormDict, + DictInfoType_TreeDict, + DictInfoType_ResDb +}; + +// This structure contains all information about dictionary or Resource Storage +// database. +struct DictInfo { + /* in file name encoding */ + std::string ifo_file_name; + + DictInfo(void); + /* ifofilename in file name encoding */ + bool load_from_ifo_file(const std::string& ifofilename, DictInfoType infotype); + bool save_ifo_file(void) const; + void clear(void); + DictInfo& operator=(const DictInfo& dict_info); + + ALL_METHOD_TEMPL(guint32, wordcount, 0) + ALL_METHOD_TEMPL(guint32, filecount, 0) + ALL_METHOD_TEMPL(guint32, synwordcount, 0) + ALL_METHOD_TEMPL(const std::string&, bookname, "") + ALL_METHOD_TEMPL(const std::string&, author, "") + ALL_METHOD_TEMPL(const std::string&, email, "") + ALL_METHOD_TEMPL(const std::string&, website, "") + ALL_METHOD_TEMPL(const std::string&, date, "") + ALL_METHOD_TEMPL(const std::string&, description, "") + ALL_METHOD_TEMPL(guint32, index_file_size, 0) + ALL_METHOD_TEMPL(const std::string&, sametypesequence, "") + ALL_METHOD_TEMPL(const std::string&, dicttype, "") + ALL_METHOD_TEMPL(const std::string&, version, "") + ALL_METHOD_TEMPL(DictInfoType, infotype, DictInfoType_NormDict) +private: + const char* get_key_value(const char *p1, std::string& key, + std::string& value); + bool check_option_duplicate(bool& flag, const char* option); + int lineno; + // flags. true if corresponding item is set + bool f_wordcount; + bool f_filecount; + bool f_synwordcount; + bool f_bookname; + bool f_author; + bool f_email; + bool f_website; + bool f_date; + bool f_description; + bool f_index_file_size; + bool f_sametypesequence; + bool f_dicttype; + bool f_version; + bool f_idxoffsetbits; + bool f_infotype; + + /* other strings in utf-8 */ + guint32 wordcount; + guint32 filecount; + guint32 synwordcount; + std::string bookname; + std::string author; + std::string email; + std::string website; + std::string date; + std::string description; + guint32 index_file_size; + std::string sametypesequence; + std::string dicttype; + std::string version; + DictInfoType infotype; +}; + +#undef GET_METHOD_TEMPL +#undef SET_METHOD_TEMPL +#undef UNSET_METHOD_TEMPL +#undef IS_METHOD_TEMPL +#undef ALL_METHOD_TEMPL + +#endif//!_IFO_FILE_H_ diff --git a/lib/stardict/lib_binary_dict_parser.cpp b/lib/stardict/lib_binary_dict_parser.cpp new file mode 100644 index 0000000..b2e6eb3 --- /dev/null +++ b/lib/stardict/lib_binary_dict_parser.cpp @@ -0,0 +1,735 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +# ifdef min +# undef min +# endif +# ifdef max +# undef max +# endif +#else +# include +#endif + +#include "lib_res_store.h" +#include "libcommon.h" +#include "ifo_file.h" +#include "lib_binary_dict_parser.h" +#include "lib_dict_verify.h" +#include "lib_chars.h" + +/* Limit the initially reserved index size. + * .ifo file may contain incorrect, unreasonably large value of index size, + * so we'd be out of memory if we try to allocate such amount. */ +const guint32 MAX_RESERVED_INDEX_SIZE = 200*1024; + +static bool compare_worditem_by_offset(const worditem_t* left, const worditem_t* right) +{ + return left->offset < right->offset; +} + +binary_dict_parser_t::binary_dict_parser_t(void) +: + dictfilesize(0), + p_res_storage(NULL), + fix_errors(false) +{ + +} + +/* p_res_storage may be NULL */ +VerifResult binary_dict_parser_t::load(const std::string& ifofilename, + i_resource_storage* p_res_storage) +{ + this->ifofilename = ifofilename; + this->p_res_storage = p_res_storage; + VerifResult result = VERIF_RESULT_OK; + if(!is_path_end_with(ifofilename, ".ifo")) { + g_critical(unsupported_file_type_err, ifofilename.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + + basefilename.assign(ifofilename, 0, ifofilename.length()-4); + if(load_ifo_file()) + return combine_result(result, VERIF_RESULT_FATAL); + result = combine_result(result, load_idx_file()); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + result = combine_result(result, load_syn_file()); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + result = combine_result(result, load_dict_file()); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + return result; +} + +int binary_dict_parser_t::get_data_fields(guint32 offset, guint32 size, data_field_vect_t& fields) const +{ + if(size == 0) + return EXIT_FAILURE; + fields.clear(); + + const char* word = "???"; + std::vector buffer(size); + + if(!dictfile) { + g_critical(dictionary_no_loaded_err); + return EXIT_FAILURE; + } + if(fseek(get_impl(dictfile), offset, SEEK_SET)) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + if(1 != fread(&buffer[0], size, 1, get_impl(dictfile))) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + + dictionary_data_block data_block; + data_block.set_resource_storage(p_res_storage); + data_block.set_fix_errors(fix_errors); + return VERIF_RESULT_FATAL <= data_block.load(&buffer[0], size, dict_info.get_sametypesequence(), word, &fields) + ? EXIT_FAILURE : EXIT_SUCCESS; +} + +VerifResult binary_dict_parser_t::prepare_idx_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + const std::string index_file_name_gz = basefilename + ".idx.gz"; + const std::string index_file_name_idx = basefilename + ".idx"; + if(g_file_test(index_file_name_gz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(index_file_name_idx.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(two_index_files_msg, index_file_name_gz.c_str(), index_file_name_idx.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + } + idxfilename_orig=index_file_name_gz; + if(g_file_test(idxfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + idxfilename = idxtemp.create_temp_file(); + if(idxfilename.empty()) + return combine_result(result, VERIF_RESULT_FATAL); + if(unpack_zlib(idxfilename_orig.c_str(), idxfilename.c_str())) + return combine_result(result, VERIF_RESULT_FATAL); + } else { + idxfilename_orig = index_file_name_idx; + idxfilename = idxfilename_orig; + } + return result; +} + +VerifResult binary_dict_parser_t::prepare_dict_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + const std::string dict_file_name_dz = basefilename + ".dict.dz"; + const std::string dict_file_name_dict = basefilename + ".dict"; + if(g_file_test(dict_file_name_dz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(dict_file_name_dict.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(two_dict_files_msg, dict_file_name_dz.c_str(), dict_file_name_dict.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + } + dictfilename_orig=dict_file_name_dz; + if(g_file_test(dictfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + dictfilename = dicttemp.create_temp_file(); + if(dictfilename.empty()) + return combine_result(result, VERIF_RESULT_FATAL); + if(unpack_zlib(dictfilename_orig.c_str(), dictfilename.c_str())) + return combine_result(result, VERIF_RESULT_FATAL); + } else { + dictfilename_orig = dict_file_name_dict; + dictfilename = dictfilename_orig; + } + return result; +} + +int binary_dict_parser_t::load_ifo_file(void) +{ + if(!dict_info.load_from_ifo_file(ifofilename, DictInfoType_NormDict)) + return EXIT_FAILURE; + return EXIT_SUCCESS; +} + +VerifResult binary_dict_parser_t::load_idx_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + { + VerifResult res = prepare_idx_file(); + result = combine_result(result, res); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= res) + return result; + } + + guint32 idxfilesize; + { + stardict_stat_t stats; + if (g_stat (idxfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(file_not_found_idx_err, idxfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + idxfilesize = (guint32)stats.st_size; + } + g_message(loading_idx_file_msg, idxfilename_orig.c_str()); + + if (dict_info.get_index_file_size() != idxfilesize) { + g_warning(incorrect_idx_file_size_err, + dict_info.get_index_file_size(), idxfilesize); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_index_file_size(idxfilesize); + g_message(fixed_msg); + } else + return result; + } + + index.clear(); + index.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_wordcount())); + + std::vector buf(idxfilesize+1); + gchar * const buffer_beg = &buf[0]; + gchar * const buffer_end = buffer_beg+idxfilesize; + { + FILE *idxfile = g_fopen(idxfilename.c_str(),"rb"); + if(!idxfile) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + if(idxfilesize != fread(buffer_beg, 1, idxfilesize, idxfile)) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); + fclose(idxfile); + return combine_result(result, VERIF_RESULT_FATAL); + } + fclose(idxfile); + } + + const char *p=buffer_beg; + int wordlen; + gint cmpvalue; + guint wordcount=0; + worditem_t worditem, preworditem; + size_t size_remain; // to the end of the index file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* const word_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!word_end) { + g_warning(index_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + worditem.word = p; + wordlen = worditem.word.length(); + if (!g_utf8_validate(worditem.word.c_str(), wordlen, NULL)) { + g_warning(word_invalid_utf8_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + worditem.word = fix_utf8_str(worditem.word, 0); + wordlen = worditem.word.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + const char* const word = worditem.word.c_str(); + if(check_xml_string_chars(word, invalid_chars)) { + result = combine_result(result, VERIF_RESULT_WARNING); + g_message(word_invalid_char_value_err, + word, print_char_codes(invalid_chars).c_str()); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + fix_xml_string_chars(word, worditem.word); + wordlen = worditem.word.length(); + } + } + } + if (wordlen > 0) { + if (wordlen>=MAX_INDEX_KEY_SIZE) { + g_warning(long_word_err, worditem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + wordlen = truncate_utf8_string(worditem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); + worditem.word.resize(wordlen); + g_message(fixed_word_truncated_msg); + } + } + bool have_spaces = false; + if (g_ascii_isspace(worditem.word[0])) { + g_message(word_begin_space_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if (g_ascii_isspace(worditem.word[wordlen-1])) { + g_message(word_end_space_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if(have_spaces && fix_errors) { + g_message(fixed_trim_spaces); + const char* new_beg; + size_t new_len; + trim_spaces(worditem.word.c_str(), new_beg, new_len); + if(new_len == 0) + worditem.word.clear(); + else { + std::string tmp(new_beg, new_len); + worditem.word = tmp; + } + } + } + if(check_stardict_key_chars(worditem.word.c_str())) { + g_message(word_forbidden_chars_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + std::string tmp; + fix_stardict_key_chars(worditem.word.c_str(), tmp); + worditem.word = tmp; + wordlen = worditem.word.length(); + } + } + if (wordlen==0) { + g_warning(empty_word_err); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_ignore_word_msg); + } + if (!preworditem.word.empty() && !worditem.word.empty()) { + cmpvalue=stardict_strcmp(preworditem.word.c_str(), worditem.word.c_str()); + if (cmpvalue>0) { + g_warning(wrong_word_order_err, preworditem.word.c_str(), worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_words_reordered_msg); + } + } + p = word_end + 1; + size_remain = buffer_end - p; + if(size_remain < 2 * sizeof(guint32)) { + g_warning(index_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + worditem.offset = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + worditem.size = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + if (worditem.size==0) { + g_warning(empty_block_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + worditem.word.clear(); + g_message(fixed_ignore_word_msg); + } + } + preworditem = worditem; + wordcount++; + index.push_back(worditem); + } // while + + g_assert(p <= buffer_end); + + if (dict_info.get_wordcount() != wordcount) { + g_warning(incorrect_word_cnt_err, dict_info.get_wordcount(), wordcount); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_wordcount(wordcount); + g_message(fixed_msg); + } + } + + for(size_t i=0; i < index.size(); ++i) { + if(index[i].word.empty()) + continue; + for(size_t j=i+1; j < index.size() && index[i].word == index[j].word; ++j) { + if(index[i].offset == index[j].offset && index[i].size == index[j].size) { + g_warning(duplicate_index_item_err, + index[i].word.c_str(), index[i].offset, index[i].size); + result = combine_result(result, VERIF_RESULT_NOTE); + break; + } + } + } + + return result; +} + +VerifResult binary_dict_parser_t::load_syn_file(void) +{ + synfilename = basefilename + ".syn"; + VerifResult result = VERIF_RESULT_OK; + + if (dict_info.get_synwordcount() == 0) { + if (g_file_test(synfilename.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(syn_file_exist_msg); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_process_syn_file_msg); + } else + return result; + } else + return result; + } + + guint32 synfilesize; + { + stardict_stat_t stats; + if (g_stat (synfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_warning(syn_file_no_found_msg, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + synfilesize = stats.st_size; + } + g_message(loading_syn_file_msg, synfilename.c_str()); + + synindex.clear(); + synindex.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_synwordcount())); + + std::vector buf(synfilesize+1); + gchar *buffer_begin = &buf[0]; + gchar *buffer_end = buffer_begin+synfilesize; + { + FILE *synfile = g_fopen(synfilename.c_str(),"rb"); + if(!synfile) { + std::string error(g_strerror(errno)); + g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + if(synfilesize != fread (buffer_begin, 1, synfilesize, synfile)) { + std::string error(g_strerror(errno)); + g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + fclose (synfile); + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + fclose (synfile); + } + + const char *p=buffer_begin; + int wordlen; + gint cmpvalue; + guint wordcount=0; + synitem_t synitem, presynitem; + size_t size_remain; // to the end of the synonyms file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* const word_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!word_end) { + g_warning(syn_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + synitem.word = p; + wordlen = synitem.word.length(); + if (!g_utf8_validate(synitem.word.c_str(), wordlen, NULL)) { + g_warning(word_invalid_utf8_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + synitem.word = fix_utf8_str(synitem.word); + wordlen = synitem.word.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + const char* const word = synitem.word.c_str(); + if(check_xml_string_chars(word, invalid_chars)) { + result = combine_result(result, VERIF_RESULT_WARNING); + g_message(word_invalid_char_value_err, + word, print_char_codes(invalid_chars).c_str()); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + fix_xml_string_chars(word, synitem.word); + wordlen = synitem.word.length(); + } + } + } + if (wordlen > 0) { + if (wordlen>=MAX_INDEX_KEY_SIZE) { + g_warning(long_word_err, synitem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + wordlen = truncate_utf8_string(synitem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); + synitem.word.resize(wordlen); + g_message(fixed_word_truncated_msg); + } + } + bool have_spaces = false; + if (g_ascii_isspace(synitem.word[0])) { + g_message(word_begin_space_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if (g_ascii_isspace(synitem.word[wordlen-1])) { + g_message(word_end_space_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if(have_spaces && fix_errors) { + g_message(fixed_trim_spaces); + const char* new_beg; + size_t new_len; + trim_spaces(synitem.word.c_str(), new_beg, new_len); + if(new_len == 0) + synitem.word.clear(); + else { + std::string tmp(new_beg, new_len); + synitem.word = tmp; + } + } + } + if (check_stardict_key_chars(synitem.word.c_str())) { + g_message(word_forbidden_chars_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + std::string tmp; + fix_stardict_key_chars(synitem.word.c_str(), tmp); + synitem.word = tmp; + wordlen = synitem.word.length(); + } + } + if (wordlen==0) { + g_warning(empty_word_err); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_ignore_word_msg); + } + if (!presynitem.word.empty() && !synitem.word.empty()) { + cmpvalue=stardict_strcmp(presynitem.word.c_str(), synitem.word.c_str()); + if (cmpvalue>0) { + g_warning(wrong_word_order_err, presynitem.word.c_str(), synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_words_reordered_msg); + } + } + p = word_end +1; + size_remain = buffer_end - p; + if(size_remain < sizeof(guint32)) { + g_warning(syn_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + synitem.index = g_ntohl(*reinterpret_cast(p)); + if (synitem.index>=dict_info.get_wordcount()) { + g_warning(wrong_index_err, synitem.word.c_str(), synitem.index); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + synitem.word.clear(); + g_message(fixed_ignore_word_msg); + } + } + p+=sizeof(guint32); + presynitem = synitem; + wordcount++; + synindex.push_back(synitem); + } // while + + g_assert(p <= buffer_end); + + if (wordcount != dict_info.get_synwordcount()) { + g_warning(incorrect_syn_word_cnt_err, + dict_info.get_synwordcount(), wordcount); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_synwordcount(wordcount); + g_message(fixed_msg); + } + } + + for(size_t i=0; i < synindex.size(); ++i) { + for(size_t j=i+1; j < synindex.size() && synindex[i].word == synindex[j].word; ++j) { + if(synindex[i].index == synindex[j].index) { + g_warning(duplicate_syn_item_err, + synindex[i].word.c_str(), synindex[i].index); + result = combine_result(result, VERIF_RESULT_NOTE); + break; + } + } + } + + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) { + g_warning(load_syn_file_failed_err, synfilename.c_str()); + if(fix_errors) { + dict_info.set_synwordcount(0); + synindex.clear(); + g_message(fixed_ignore_syn_file_msg); + result = VERIF_RESULT_CRITICAL; + } + } + return result; +} + +VerifResult binary_dict_parser_t::load_dict_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + { + VerifResult res = prepare_dict_file(); + result = combine_result(result, res); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= res) + return result; + } + + { + stardict_stat_t stats; + if (g_stat (dictfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(dict_file_not_found_err, dictfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + dictfilesize = stats.st_size; + } + + g_message(loading_dict_file_err, dictfilename_orig.c_str()); + dictfile.reset(g_fopen(dictfilename.c_str(), "rb")); + if(!dictfile) { + std::string error(g_strerror(errno)); + g_critical(open_dict_file_failed_err, dictfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + + std::vector buffer; + dictionary_data_block block_verifier; + block_verifier.set_resource_storage(p_res_storage); + block_verifier.set_fix_errors(fix_errors); + for(size_t i=0; i dictfilesize) { + g_warning(record_out_of_file_err, index[i].word.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + if(index[i].offset >= dictfilesize) { + index[i].word.clear(); + g_message(fixed_ignore_word_msg); + continue; + } else { + index[i].size = dictfilesize - index[i].offset; + g_message(fixed_data_block_size_change_msg); + } + } else { + continue; + } + } + buffer.resize(index[i].size); + if(fseek(get_impl(dictfile), index[i].offset, SEEK_SET)) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + if(1 != fread(&buffer[0], index[i].size, 1, get_impl(dictfile))) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + VerifResult result2 = block_verifier.load(&buffer[0], index[i].size, + dict_info.get_sametypesequence(), index[i].word.c_str()); + if(VERIF_RESULT_FATAL <= result2) { + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + index[i].word.clear(); + g_message(fixed_ignore_word_msg); + continue; + } + } else + result = combine_result(result, result2); + } + result = combine_result(result, verify_data_blocks_overlapping()); + return result; +} + +VerifResult binary_dict_parser_t::verify_data_blocks_overlapping(void) +{ + VerifResult result = VERIF_RESULT_OK; + std::vector sort_index(index.size(), NULL); + for(size_t i=0; i > overlapping_blocks; + ::verify_data_blocks_overlapping(sort_index, overlapping_blocks); + for(size_t i=0; i unused_regions; + verify_unused_regions(sort_index, unused_regions, dictfilesize); + if(!unused_regions.empty()) { + g_warning(unreferenced_data_blocks_msg); + result = combine_result(result, VERIF_RESULT_NOTE); + for(size_t i = 0; i + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef LIB_BINARY_DICT_PARSER_H_ +#define LIB_BINARY_DICT_PARSER_H_ + +#include +#include +#include "libcommon.h" +#include "ifo_file.h" +#include "lib_dict_data_block.h" +#include "lib_res_store.h" +#include "lib_dict_verify.h" + +struct worditem_t { + std::string word; + guint32 offset; + guint32 size; +}; + +struct synitem_t { + std::string word; + guint32 index; +}; + +class i_resource_storage; + +class binary_dict_parser_t +{ +public: + typedef std::vector worditem_vect_t; + typedef std::vector synitem_vect_t; + + binary_dict_parser_t(void); + VerifResult load(const std::string& ifofilename, + i_resource_storage* p_res_storage = NULL); + void set_fix_errors(bool b) + { + fix_errors = b; + } + bool get_fix_errors(void) const + { + return fix_errors; + } + const worditem_vect_t& get_worditems(void) const + { + return index; + } + const synitem_vect_t& get_synitems(void) const + { + return synindex; + } + const DictInfo& get_dict_info(void) const + { + return dict_info; + } + int get_data_fields(guint32 offset, guint32 size, data_field_vect_t& fields) const; + +private: + VerifResult prepare_idx_file(void); + VerifResult prepare_dict_file(void); + int load_ifo_file(void); + VerifResult load_idx_file(void); + VerifResult load_syn_file(void); + VerifResult load_dict_file(void); + VerifResult verify_data_blocks_overlapping(void); + + std::string basefilename; + std::string ifofilename; + std::string idxfilename; // file to read, uncompressed + std::string idxfilename_orig; // may be archive + std::string dictfilename; + std::string dictfilename_orig; + std::string synfilename; + DictInfo dict_info; + TempFile idxtemp; + TempFile dicttemp; + clib::File dictfile; + guint32 dictfilesize; + std::vector index; + std::vector synindex; + i_resource_storage* p_res_storage; + /* fix errors if possible. We never change the files we read, + * all fixes effect only in-memory data structures. + * If an error is fixed, we do not return failure status, + * but an error message is printed nevertheless. */ + bool fix_errors; +}; + + +#endif /* LIB_BINARY_DICT_PARSER_H_ */ diff --git a/lib/stardict/lib_chars.cpp b/lib/stardict/lib_chars.cpp new file mode 100644 index 0000000..fc57841 --- /dev/null +++ b/lib/stardict/lib_chars.cpp @@ -0,0 +1,168 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include "lib_chars.h" + +/* + * Only chars satisfying the following production in the XML specification are allowed: + * + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * */ +inline +bool is_valid_xml_char(gunichar gch) +{ + return xmlIsCharQ(gch); +} + +/* characters allowed in StarDict dictionary (keys, synonyms, definition text). + * We allow all valid Unicode chars excluding control chars. */ +inline +bool is_valid_stardict_char(gunichar gch) +{ + return gch == 0x9 || gch == 0xa || gch == 0xd + || (0x20 <= gch && gch <= 0x7e) + || gch == 0x85 + || (0xa0 <= gch && gch <= 0xff) + || (0x100 <= gch && gch <= 0xd7ff) + || (0xe000 <= gch && gch <= 0xfffd) + || (0x10000 <= gch && gch <= 0x10ffff); +} + +/* check string str for invalid chars + * str - a valid utf8 string + * + * The function returns EXIT_SUCCESS if all chars of the string are valid, + * and EXIT_FAILURE otherwise. In the later case invalid_chars list is populated with + * references to invalid chars found. invalid_chars[i] points to the first byte of + * the invalid char in the str string. + * */ +template +int check_string_chars(const char* str, const size_t len, std::list& invalid_chars, Func is_valid_char) +{ + invalid_chars.clear(); + for(const char* p = str; p < str + len; p = g_utf8_next_char(p)) { + if(!is_valid_char(g_utf8_get_char(p))) + invalid_chars.push_back(p); + } + return invalid_chars.empty() ? EXIT_SUCCESS : EXIT_FAILURE; +} + +/* copy source string str into destination string dst dropping invalid chars. + * For definition of an invalid char, see check_xml_string_chars function. + * src and dst may be the same string: + * + * std::string str; + * ... + * fix_xml_string_chars(str.c_str(), str); + * */ +template +void fix_string_chars(const char* src, const size_t len, std::string& dst, Func is_valid_char) +{ + std::string temp; + temp.reserve(len); + for(const char* p = src; p < src + len; p = g_utf8_next_char(p)) { + if(is_valid_char(g_utf8_get_char(p))) { + const char* q = g_utf8_next_char(p); + temp.append(p, q-p); + } + } + std::swap(dst, temp); +} + +int check_xml_string_chars(const char* str, std::list& invalid_chars) +{ + return check_xml_string_chars(str, strlen(str), invalid_chars); +} + +int check_xml_string_chars(const char* str, const size_t len, std::list& invalid_chars) +{ + return check_string_chars(str, len, invalid_chars, is_valid_xml_char); +} + +void fix_xml_string_chars(const char* src, std::string& dst) +{ + fix_xml_string_chars(src, strlen(src), dst); +} + +void fix_xml_string_chars(const char* src, const size_t len, std::string& dst) +{ + fix_string_chars(src, len, dst, is_valid_xml_char); +} + +int check_stardict_string_chars(const char* str, std::list& invalid_chars) +{ + return check_stardict_string_chars(str, strlen(str), invalid_chars); +} + +int check_stardict_string_chars(const char* str, const size_t len, std::list& invalid_chars) +{ + return check_string_chars(str, len, invalid_chars, is_valid_stardict_char); +} + +void fix_stardict_string_chars(const char* src, std::string& dst) +{ + fix_stardict_string_chars(src, strlen(src), dst); +} + +void fix_stardict_string_chars(const char* src, const size_t len, std::string& dst) +{ + fix_string_chars(src, len, dst, is_valid_stardict_char); +} + +int check_stardict_key_chars(const char* str) +{ + return strpbrk(str, key_forbidden_chars) ? EXIT_FAILURE : EXIT_SUCCESS; +} + +/* in addition to removing key_forbidden_chars + * - remove leading and trailing ' ' and '\t', + * - consecutive ' ' and '\t' transform to one ' '*/ +void fix_stardict_key_chars(const char* str, std::string& dst) +{ + dst.clear(); + dst.reserve(strlen(str)); + while(*str && strchr(key_forbidden_chars_ex, *str)) + ++str; + if(!*str) + return; + while(true) { + while(*str && !strchr(key_forbidden_chars_ex, *str)) { + dst += *str; + ++str; + } + if(!*str) + return; + while(*str && strchr(key_forbidden_chars_ex, *str)) + ++str; + if(!*str) + return; + dst += ' '; + } +} diff --git a/lib/stardict/lib_chars.h b/lib/stardict/lib_chars.h new file mode 100644 index 0000000..0461148 --- /dev/null +++ b/lib/stardict/lib_chars.h @@ -0,0 +1,44 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef LIB_CHARS_H_ +#define LIB_CHARS_H_ + +#include +#include + +#define key_forbidden_chars \ + "\n\r" +#define key_forbidden_chars_ex \ + key_forbidden_chars " \t" + +extern int check_xml_string_chars(const char* str, std::list& invalid_chars); +extern int check_xml_string_chars(const char* str, const size_t len, std::list& invalid_chars); +extern void fix_xml_string_chars(const char* src, std::string& dst); +extern void fix_xml_string_chars(const char* src, const size_t len, std::string& dst); + +extern int check_stardict_string_chars(const char* str, std::list& invalid_chars); +extern int check_stardict_string_chars(const char* str, const size_t len, std::list& invalid_chars); +extern void fix_stardict_string_chars(const char* src, std::string& dst); +extern void fix_stardict_string_chars(const char* src, const size_t len, std::string& dst); + +extern int check_stardict_key_chars(const char* str); +extern void fix_stardict_key_chars(const char* str, std::string& dst); + +#endif /* LIB_CHARS_H_ */ diff --git a/lib/stardict/lib_dict_data_block.cpp b/lib/stardict/lib_dict_data_block.cpp new file mode 100644 index 0000000..46cc9df --- /dev/null +++ b/lib/stardict/lib_dict_data_block.cpp @@ -0,0 +1,697 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include "lib_res_store.h" +#include "libcommon.h" +#include "ifo_file.h" +#include "lib_dict_data_block.h" +#include "lib_dict_verify.h" +#include "lib_chars.h" + + +size_t data_field_t::get_size(void) const +{ + if(g_ascii_islower(type_id)) + return data.size() - 1; + else + return data.size(); +} + +const char* data_field_t::get_data(void) const +{ + if(data.empty()) + return NULL; + else + return &data[0]; +} + +void data_field_t::set_data(const char* p, size_t size, bool add_null) +{ + if(add_null) + data.reserve(size+1); + data.assign(p, p+size); + if(add_null) + data.push_back('\0'); +} + + +/* must load at least 1 field, otherwise - error. */ +VerifResult dictionary_data_block::load(const char* const data, size_t data_size, + const std::string& sametypesequence, const char* word, + data_field_vect_t* fields) +{ + this->fields = fields; + this->word = word; + if(fields) + fields->clear(); + if(data_size == 0) { + g_warning(empty_block_err, word); + return VERIF_RESULT_FATAL; + } + field_num = 0; + VerifResult result = VERIF_RESULT_OK; + if (!sametypesequence.empty()) { + result = combine_result(result, load_sametypesequence(data, data_size, sametypesequence)); + } else { + result = combine_result(result, load_no_sametypesequence(data, data_size)); + } + if(VERIF_RESULT_FATAL <= result) { + if(fields) + fields->clear(); + return result; + } + if(field_num == 0) { + g_warning(data_block_no_fields_err, word); + return VERIF_RESULT_FATAL; + } + return result; +} + +VerifResult dictionary_data_block::load_sametypesequence(const char* const data, size_t data_size, + const std::string& sametypesequence) +{ + const char* p = data; + size_t size_remain; // to the end of the data block + VerifResult result = VERIF_RESULT_OK; + for (size_t i=0; i(p-data) <= data_size); + size_remain = data_size - (p - data); // 0 is OK + const char type_id = sametypesequence[i]; + ext_result_t ext_result(load_field(type_id, p, size_remain)); + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } + result = combine_result(result, ext_result.content); + } + // last item + g_assert(static_cast(p-data) <= data_size); + size_remain = data_size - (p - data); + const char type_id = sametypesequence[sametypesequence.length()-1]; + ext_result_t ext_result; + if(g_ascii_isupper(type_id)) { + ext_result = load_field_sametypesequence_last_upper(type_id, p, size_remain); + } else if(g_ascii_islower(type_id)) { + ext_result = load_field_sametypesequence_last_lower(type_id, p, size_remain); + } else { + g_warning(unknown_type_id_err, word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + } + g_warning(fields_extraction_faild_err, word); + return result; + } + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } else + result = combine_result(result, ext_result.content); + if(!strchr(known_type_ids, type_id)) { + g_warning(unknown_type_id_err, word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_accept_unknown_field_msg); + } + } + g_assert(static_cast(p-data) <= data_size); + size_remain = data_size - (p - data); + if(size_remain > 0) { + g_warning(incorrect_data_block_size_err, word); + result = combine_result(result, VERIF_RESULT_WARNING); + } + return result; +} + +VerifResult dictionary_data_block::load_no_sametypesequence(const char* const data, size_t data_size) +{ + const char* p = data; + size_t size_remain; // to the end of the data block + VerifResult result = VERIF_RESULT_OK; + while(true) { + size_remain = data_size - (p - data); + if(size_remain == 0) + return result; + const char type_id = *p; + ++p; + --size_remain; + ext_result_t ext_result(load_field(type_id, p, size_remain)); + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } + result = combine_result(result, ext_result.content); + } + g_assert_not_reached(); + return VERIF_RESULT_OK; +} + +ext_result_t dictionary_data_block::load_field(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + ext_result.append(FIELD_VERIF_RES_SKIP); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + if(g_ascii_isupper(type_id)) { + ext_result.append(load_field_upper(type_id, p, size_remain)); + } else if(g_ascii_islower(type_id)) { + ext_result.append(load_field_lower(type_id, p, size_remain)); + } else { + g_warning(unknown_type_id_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + ext_result.append(FIELD_VERIF_RES_ABORT); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + if(!strchr(known_type_ids, type_id)) { + g_warning(unknown_type_id_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_accept_unknown_field_msg); + } + } + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_upper(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain < sizeof(guint32)) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_ABORT); + g_warning(incorrect_data_block_size_err, word); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + guint32 size = g_ntohl(*reinterpret_cast(p)); + if(size_remain < sizeof(guint32) + size) { + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + size = size_remain - sizeof(guint32); + g_message(fixed_change_field_size_msg); + } else { + p += size_remain; + ext_result.append(FIELD_VERIF_RES_ABORT); + return ext_result; + } + } + p += sizeof(guint32); + if(size == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0); + return ext_result; + } + } + const char* data = p; + p += size; + VerifResult result = verify_field_content(type_id, data, size); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, size); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, size); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_lower(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain < 1) { // data must contain at least '\0' + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + const char* field_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!field_end) { + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_ABORT); + if(fix_errors) { + g_message(fixed_field_take_longest_str_msg); + field_end = p + size_remain; + } else { + p += size_remain; + return ext_result; + } + } + /* In case we need to apply changes to data, we'll store modified copy here. */ + std::string data_str; + const char* data = p; + int datalen = field_end - p; + p += std::min(datalen + 1, size_remain); // shift the pointer to the next field + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0, true); + return ext_result; + } + } + if (!g_utf8_validate(data, datalen, NULL)) { + g_warning(invalid_utf8_field_err, word, type_id, data); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + data_str = fix_utf8_str(std::string(data, datalen), 0); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } else { + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + if(check_xml_string_chars(data, datalen, invalid_chars)) { + std::string temp(data, datalen); + g_message(invalid_field_content_chars_err, word, type_id, temp.c_str(), + print_char_codes(invalid_chars).c_str()); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + fix_xml_string_chars(data, datalen, data_str); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + } + } + VerifResult result = verify_field_content(type_id, data, datalen); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, datalen); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, datalen, true); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_sametypesequence_last_upper(const char type_id, + const char*& p, const size_t size_remain) +{ + guint32 size = size_remain; + ext_result_t ext_result; + if(size == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0); + return ext_result; + } + } + const char* data = p; + p += size; + VerifResult result = verify_field_content(type_id, data, size); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, size); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, size); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_sametypesequence_last_lower(const char type_id, + const char*& p, const size_t size_remain) +{ + size_t datalen = size_remain; + ext_result_t ext_result; + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0, true); + return ext_result; + } + } + /* In case we need to apply changes to data, we'll store the modified copy here. */ + std::string data_str; + const char* data = p; + p += size_remain; // shift the pointer to the next field + const char* p2 = reinterpret_cast(memchr(data, '\0', datalen)); + if(p2) { + // '\0' found in the last record + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + datalen = p2 - data; + if(datalen == 0) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + g_message(fixed_field_take_zero_term_str_msg); + } + } + if (!g_utf8_validate(data, datalen, NULL)) { + std::string tmp(data, datalen); + g_warning(invalid_utf8_field_err, word, type_id, tmp.c_str()); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + data_str = fix_utf8_str(std::string(data, datalen), 0); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } else { + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + if(check_xml_string_chars(data, datalen, invalid_chars)) { + std::string temp(data, datalen); + g_message(invalid_field_content_chars_err, word, type_id, temp.c_str(), + print_char_codes(invalid_chars).c_str()); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + fix_xml_string_chars(data, datalen, data_str); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + } + } + VerifResult result = verify_field_content(type_id, data, datalen); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, datalen); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, datalen, true); + return ext_result; +} + +/* any fatal error may be solved by ignoring this field + * So VERIF_RESULT_FATAL is counted as VERIF_RESULT_CRITICAL by caller function. */ +VerifResult dictionary_data_block::verify_field_content(const char type_id, const char* data, guint32 size) +{ + if(type_id == 'x') + return verify_field_content_x(data, size); + if(type_id == 'r') + return verify_field_content_r(data, size); + return VERIF_RESULT_OK; +} + +VerifResult dictionary_data_block::verify_field_content_x(const char* data, guint32 size) +{ + const char type_id = 'x'; + // create a '\0'-terminated string + std::string temp(data, size); + std::string key; + const char* p; + const char* tag; + VerifResult result = VERIF_RESULT_OK; + for(p = temp.c_str(); p && *p && (tag = strstr(p, "') + ++p; + else if (*p == ' ') { + p = strchr(p, '>'); + if(!p) + break; + ++p; + } else { // error + p = strchr(p, '>'); + if(!p) + break; + ++p; + continue; + } + // p points after the "" + tag = strstr(p, ""); + if(!tag) + break; + key.assign(p, tag - p); + if(p_res_storage && !p_res_storage->have_file(key)) { + g_warning(resource_not_found_msg, + word, type_id, key.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_msg); + } + } + p = tag + sizeof("") - 1; + } + return result; +} + +VerifResult dictionary_data_block::verify_field_content_r(const char* const data, guint32 size, + resitem_vect_t *items) +{ + const char type_id = 'r'; + const char* line_beg = data; + const char* line_end; + resitem_t resitem; + VerifResult result = VERIF_RESULT_OK; + size_t item_num = 0; // number of successfully extracted items + + if(items) + items->clear(); + while(true) { + const gint size_remain = static_cast(size) - (line_beg - data); + if(size_remain <= 0) + break; + line_end = (const char*)memchr(line_beg, '\n', size_remain); + if(!line_end) + line_end = data + size; + if(line_beg == line_end) { + g_warning(resource_invalid_format_empty_line_msg, + word, type_id); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + ++line_beg; + continue; + } else { + continue; + } + } + const std::string line(line_beg, line_end - line_beg); + const char* colon = (const char*)memchr(line_beg, ':', line_end - line_beg); + if(!colon) { + g_warning(resource_invalid_format_colon_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + line_beg = line_end + 1; + continue; + } else { + continue; + } + } + resitem.type.assign(line_beg, colon - line_beg); + ++colon; + resitem.key.assign(colon, line_end - colon); + line_beg = line_end + 1; + if(resitem.type.empty()) { + g_warning(resource_invalid_format_type_blank_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(resitem.key.empty()) { + g_warning(resource_invalid_format_key_blank_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(!is_known_resource_type(resitem.type.c_str())) { + g_warning(resource_invalid_format_unknown_type_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(resitem.key.find('\\') != std::string::npos) { + g_warning(resource_invalid_format_back_spash_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(p_res_storage && !p_res_storage->have_file(resitem.key)) { + g_warning(resource_resource_nof_found_msg, + word, type_id, line.c_str(), resitem.key.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } + } + if(items) + items->push_back(resitem); + ++item_num; + } + if(item_num == 0) { + g_warning(resource_empty_list_msg, + word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + } + return result; +} + +void dictionary_data_block::add_field(char type_id, const char* data, size_t datalen, bool add_null) +{ + ++field_num; + if(fields) { + data_field_t field; + field.type_id = type_id; + field.set_data(data, datalen, add_null); + fields->push_back(field); + } +} diff --git a/lib/stardict/lib_dict_data_block.h b/lib/stardict/lib_dict_data_block.h new file mode 100644 index 0000000..906205a --- /dev/null +++ b/lib/stardict/lib_dict_data_block.h @@ -0,0 +1,188 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef LIB_DICT_DATA_BLOCK_H_ +#define LIB_DICT_DATA_BLOCK_H_ + +#include +#include "lib_res_store.h" +#include "lib_dict_verify.h" + +/* field verification result */ +enum FieldVerifResult { + FIELD_VERIF_RES_OK, // everything is fine, this field may be used + // skip (ignore) this field, may go to the next field + // This field is not recoverable, but the end of the field is reliable. + FIELD_VERIF_RES_SKIP, + // abort processing the sequence of fields + // Often because it's undefined where the next field starts. + FIELD_VERIF_RES_ABORT +}; + +inline +FieldVerifResult combine_result(FieldVerifResult a, FieldVerifResult b) +{ + return std::max(a, b); +} + +/* extract result */ +struct ext_result_t { + ext_result_t() + : + field(FIELD_VERIF_RES_OK), + content(VERIF_RESULT_OK) + { + } + ext_result_t(FieldVerifResult field, VerifResult content) + : + field(field), + content(content) + { + } + ext_result_t& operator=(const ext_result_t& right) + { + field = right.field; + content = right.content; + return *this; + } + ext_result_t& append(const ext_result_t& right) + { + append(right.field); + append(right.content); + return *this; + } + void append(FieldVerifResult result) + { + field = combine_result(field, result); + } + void append(VerifResult result) + { + content = combine_result(content, result); + } + VerifResult summary(void) const + { + return content; + } + FieldVerifResult field; // field extraction result. + VerifResult content; // field content result. Is content OK or not? +}; + +struct data_field_t +{ + data_field_t(void) + : + type_id(0) + { + } + + char type_id; + /* for string data types, return string length, + * for binary data types, return data size */ + size_t get_size(void) const; + /* for string data types, return a '\0'-terminated string. */ + const char* get_data(void) const; + void set_data(const char* p, size_t size, bool add_null = false); +private: + /* for string data types, like 'm', data ends with '\0' char, + * for binary data types, the vector contains only data. */ + std::vector data; +}; + +typedef std::vector data_field_vect_t; + +class dictionary_data_block { +public: + dictionary_data_block(void) + : + word(NULL), + p_res_storage(NULL), + fix_errors(false), + fields(NULL), + field_num(0) + { + + } + VerifResult load(const char* const data, size_t data_size, + const std::string& sametypesequence, const char* word, + data_field_vect_t* fields = NULL); + void set_resource_storage(i_resource_storage* p_res_storage) + { + this->p_res_storage = p_res_storage; + } + void set_fix_errors(bool b) + { + fix_errors = b; + } + void set_word(const char* word) + { + this->word = word; + } + /* if you use this method directly, do not forget to set_word(). NULL as argument is OK. + * any fatal error may be solved by ignoring this field */ + VerifResult verify_field_content_r(const char* const data, guint32 size, resitem_vect_t *items = NULL); +private: + VerifResult load_no_sametypesequence(const char* const data, size_t data_size); + VerifResult load_sametypesequence(const char* const data, size_t data_size, + const std::string& sametypesequence); + /* for all load_field* methods + * all method have two means to indicate processing result. + * ext_result_t.content holds the integral result of the processing the field. + * VERIF_RESULT_FATAL is counted as VERIF_RESULT_CRITICAL by caller function. + * Any fatal error may be solved by ignoring the field or entire field collection. + * ext_result_t.field indicates what we can do next (switch to the next field, + * or abort processing the field collection). + * p parameter initially point to the beginning of the data area. + * Field extraction method must move it past the processed field, + * to the beginning of the next field. + * size of the available data is restricted by size_remain parameter. + * Extraction function is not allowed to access data outside this region. + * The field may occupy either full region or only part of it. + * Extraction function should read as much data as it needs but not more. + * + * fix_errors. When true, we are working hard to all fix errors, extract as much data + * as possible. We performs as many tests as possible, testing fixed data. + * + * fields. When specified, all extracted fields are added here. + * When fix_errors is specified, we add only clean fields, after all possible fixes. + * When fix_errors is not specified, we fix only errors >= VERIF_RESULT_CRITICAL. + * What should we do in that last case? We need to fix some errors anyway, + * even when fix_errors is false. Otherwise we can not go forward. */ + ext_result_t load_field(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_upper(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_lower(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_sametypesequence_last_upper(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_sametypesequence_last_lower(const char type_id, + const char*& p, size_t size_remain); + VerifResult verify_field_content(const char type_id, const char* data, guint32 size); + VerifResult verify_field_content_x(const char* data, guint32 size); + void add_field(char type_id, const char* data, size_t datalen, bool add_null = false); + + const char* word; + i_resource_storage* p_res_storage; // may be NULL + bool fix_errors; + data_field_vect_t* fields; + size_t field_num; // number of fields extracted +}; + + +#endif /* LIB_DICT_DATA_BLOCK_H_ */ diff --git a/lib/stardict/lib_dict_verify.cpp b/lib/stardict/lib_dict_verify.cpp new file mode 100644 index 0000000..bba7a57 --- /dev/null +++ b/lib/stardict/lib_dict_verify.cpp @@ -0,0 +1,72 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include "lib_dict_verify.h" +#include "libcommon.h" +#include "lib_res_store.h" +#include "lib_binary_dict_parser.h" + +/* Terminology + +Index file is a sequence of index items. +An index item consists of: +word - the key of the item; +size and offset of data block containing definition in the dictionary file. +A data block consists of a number of fields. +A field has a type specified by type identifier (one character, an ASCII letter). +*/ + +VerifResult stardict_verify(const char *ifofilename) +{ + VerifResult result = VERIF_RESULT_OK; + + g_message("Verifying dictionary '%s'...", ifofilename); + glib::CharStr cdirname(g_path_get_dirname(ifofilename)); + resource_storage res_storage; + res_storage.load(get_impl(cdirname)); + result = combine_result(result, res_storage.get_verif_result()); + + binary_dict_parser_t dict; + result = combine_result(result, dict.load(ifofilename, static_cast(&res_storage))); + + if(result == VERIF_RESULT_OK) + g_message("Dictionary '%s'. Verification result: OK.", ifofilename); + else if(result < VERIF_RESULT_CRITICAL) + g_message("Dictionary '%s'. Verification result: Non-critical problems were found. The dictionary is safe to use.", ifofilename); + else + g_message("Dictionary '%s'. Verification result: The dictionary is broken. Do not use it.", ifofilename); + return result; +} diff --git a/lib/stardict/lib_dict_verify.h b/lib/stardict/lib_dict_verify.h new file mode 100644 index 0000000..20af6cf --- /dev/null +++ b/lib/stardict/lib_dict_verify.h @@ -0,0 +1,287 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef _LIBSTARDICTVERIFY_H_ +#define _LIBSTARDICTVERIFY_H_ + +#include +#include +#include "libcommon.h" + +#ifdef _WIN32 +# ifdef min +# undef min +# endif +# ifdef max +# undef max +# endif +#endif + +enum VerifResult { + VERIF_RESULT_OK, // no error + VERIF_RESULT_NOTE, // minor issue, save to ignore (for example, trailing spaces in key word) + VERIF_RESULT_WARNING, // important issue, maybe ignored (double keys in index, referring to the same data) + VERIF_RESULT_CRITICAL, // may be fixed, but cannot be ignored (for example, index entries are out of order) + VERIF_RESULT_FATAL // cannot be fixed (for example, .idx file is missing) +}; + +extern VerifResult stardict_verify(const char *ifofilename); + +struct region_t { + guint32 offset; + guint32 size; +}; + +/* combine two verification results = the most serious error */ +inline +VerifResult combine_result(VerifResult a, VerifResult b) +{ + return std::max(a, b); +} + +template +void verify_data_blocks_overlapping(std::vector& sort_index, + std::vector >& overlapping_blocks) +{ + for(size_t i=0; ioffset + sort_index[i]->size > sort_index[j]->offset; ++j) { + if(sort_index[i]->offset == sort_index[j]->offset + && sort_index[i]->size == sort_index[j]->size) + continue; + if(sort_index[j]->size == 0) + continue; + overlapping_blocks.push_back(std::pair(i, j)); + } + } +} + +template +void verify_unused_regions(std::vector& sort_index, + std::vector& unused_regions, guint32 filesize) +{ + region_t region; + guint32 low_boundary=0; + for(size_t i=0; ioffset; + const guint32 l_right = sort_index[i]->offset + sort_index[i]->size; + if(l_left < low_boundary) { + if(l_right > low_boundary) + low_boundary = l_right; + } if(l_left == low_boundary) { + low_boundary = l_right; + } else { // gap found + region.offset = low_boundary; + region.size = l_left - low_boundary; + unused_regions.push_back(region); + low_boundary = l_right; + } + } + if(low_boundary < filesize) { + region.offset = low_boundary; + region.size = filesize - low_boundary; + unused_regions.push_back(region); + } +} + +#define index_file_truncated_err \ + "Index file is truncated, last record is truncated." +#define incorrect_data_block_size_err \ + "Index item '%s'. Fields do not fit into the data block, incorrect data block size." +#define empty_field_err \ + "Index item '%s'. Empty field in definition data block. Type ID '%c'." +#define invalid_utf8_field_err \ + "Index item '%s'. Invalid field. Type id = '%c'. Invalid utf8 string: '''\n%s\n'''" +#define invalid_utf8_index_item_err \ + "Index item '%s'. Invalid field. Invalid utf8 string: '''\n%s\n'''" +#define invalid_field_content_err \ + "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''" +#define invalid_chars_in_textual_data_msg \ + "The text contains either invalid Unicode characters " \ + "or Unicode characters not suitable for textual data (mainly control characters). " \ + "The following characters are prohibited: %s." +#define invalid_field_content_chars_err \ + "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''\n"\ + invalid_chars_in_textual_data_msg +#define syn_file_truncated_err \ + "Synonyms file is truncated, last record is truncated." +#define unknown_type_id_err \ + "Index item '%s'. Unknown type identifier '%c'." +#define empty_word_err \ + "Blank key in index." +#define empty_file_name_err \ + "Blank file name in index." +#define long_word_err \ + "Index item '%s'. Key is too long. Maximum allowed length: %d, key length: %d." +#define word_begin_space_err \ + "Index item '%s'. Key begins with a space character." +#define word_end_space_err \ + "Index item '%s'. Key ends with a space character." +#define word_forbidden_chars_err \ + "Index item '''%s'''\nKey contains forbidden characters." +#define word_invalid_utf8_err \ + "Index item '%s'. Invalid utf8 string." +#define word_invalid_char_value_err \ + "Index item '%s'. Invalid item name.\n" \ + invalid_chars_in_textual_data_msg +#define wrong_word_order_err \ + "Wrong key order, first key = '%s', second key = '%s'." +#define wrong_file_order_err \ + "Wrong file order, first file name = '%s', second file name = '%s'." +#define fields_extraction_faild_err \ + "Index item '%s'. Extraction of the fields failed." +#define unsupported_file_type_err \ + "Unsupported file type. File must have 'ifo' extension. File: '%s'." +#define dictionary_no_loaded_err \ + "Dictionary is not loaded." +#define file_not_found_idx_err \ + "Unable to find index file: '%s'. Error: %s." +#define loading_idx_file_msg \ + "Loading index file: '%s'..." +#define incorrect_idx_file_size_err \ + "Incorrect size of the index file: in .ifo file, idxfilesize=%u, real file size is %u." +#define incorrect_ridx_file_size_err \ + "Incorrect size of the index file: in .rifo file, ridxfilesize=%d, real file size is %ld." +#define empty_block_err \ + "Index item '%s'. Data block size = 0." +#define incorrect_word_cnt_err \ + "Incorrect number of words: in .ifo file, wordcount=%d, while the real word count is %d." +#define incorrect_syn_word_cnt_err \ + "Incorrect number of words: in .ifo file, synwordcount=%d, while the real synwordcount is %d." +#define duplicate_index_item_err \ + "Multiple index items have the same key = '%s', offset = %d, size = %d." +#define duplicate_syn_item_err \ + "Multiple synonym items with the same key = '%s', index = %d." +#define syn_file_exist_msg \ + ".syn file exists but there is no \"synwordcount=\" entry in .ifo file." +#define syn_file_no_found_msg \ + "Unable to find synonyms file '%s'. Error: %s." +#define loading_syn_file_msg \ + "Loading synonyms file: '%s'..." +#define wrong_index_err \ + "Index item '%s'. Wrong index of entry in the index file: %d." +#define load_syn_file_failed_err \ + "Loading synonyms file failed: '%s'." +#define dict_file_not_found_err \ + "Dictionary file does not exist: '%s'. Error: %s." +#define loading_dict_file_err \ + "Loading dictionary file: '%s'..." +#define open_dict_file_failed_err \ + "Unable open dictionary file '%s'. Error: %s." +#define record_out_of_file_err \ + "Index item '%s'. Incorrect size, offset parameters. Referenced data block is outside dictionary file." +#define overlapping_data_blocks_msg \ + "Index item '%s' and index item '%s' refer to overlapping but not equal regions (offset, size): " \ + "(%u, %u) and (%u, %u)." +#define unreferenced_data_blocks_msg \ + "Dictionary contains unreferenced data blocks (offset, size):" +#define rdb_unreferenced_data_blocks_msg \ + "Resource database contains unreferenced data blocks (offset, size):" +#define data_block_no_fields_err \ + "Index item '%s'. No fields were extracted." +#define resource_not_found_msg \ + "Index item '%s'. Type id '%c'. The field refers to resource '%s', that is not found in resource storage." +#define resource_invalid_format_empty_line_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Empty resource line." +#define resource_invalid_format_colon_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. ':' is not found." +#define resource_invalid_format_type_blank_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Type is blank." +#define resource_invalid_format_key_blank_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key is blank." +#define resource_invalid_format_unknown_type_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Unknown type." +#define resource_invalid_format_back_spash_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key contains '\\' char." +#define resource_resource_nof_found_msg \ + "Index item '%s'. Type id '%c'. Line '%s'. The field refers to resource '%s', that is not found in resource storage." +#define resource_empty_list_msg \ + "Index item '%s'. Type id '%c'. Empty resource list." +#define two_index_files_msg \ + "Two index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define two_dict_files_msg \ + "Two dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define rdb_filecnt_zero_err \ + "Resource database '%s'. No files. filecount = 0." +#define rdb_ridxfilesize_zero_err \ + "Resource database '%s'. Empty index file size. ridxfilesize = 0." +#define rdb_invalid_file_name_format_back_spash_err \ + "Index item '%s'. Found '\\' character. '/' must be used as directory separator." +#define rdb_invalid_file_name_format_abs_path_err \ + "Index item '%s'. File name must not start with directory separator '/'." +#define rdb_invalid_file_name_format_empty_dir_err \ + "Index item '%s'. Empty directory in file path: '//'." +#define rdb_incorrect_file_cnt \ + "Incorrect number of files: in .rifo file, filecount=%d, while the real file count is %d." +#define rdb_dict_file_not_found_err \ + "Unable to find resource dictionary file: '%s'. Error: %s." +#define rdb_loading_ridx_file_msg \ + "Loading resource index file: '%s'..." +#define rdb_loading_dict_file_msg \ + "Loading resource dictionary file: '%s'..." +#define rdb_loaded_db_msg \ + "Resource storage loaded. Type - database." +#define rdb_load_db_failed_msg \ + "Resource storage load failed. Type - database." +#define rdb_loaded_files_msg \ + "Resource storage loaded. Type - files." +#define rdb_load_files_failed_msg \ + "Resource storage load failed. Type - files." +#define rdb_two_index_files_msg \ + "Two resource index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define rdb_two_dict_files_msg \ + "Two resource dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." + +#define fixed_ignore_field_msg \ + "The problem was fixed. Ignore the field." +#define duplicate_file_name \ + "Multiple index items with the same file name: '%s'." +#define fixed_accept_unknown_field_msg \ + "The problem was fixed. Accept unknown field type." +#define fixed_ignore_resource_line_msg \ + "The problem was fixed. Ignore the resource line." +#define fixed_ignore_file_tail_msg \ + "The problem was fixed. Ignore the tail of the file." +#define fixed_ignore_syn_file_msg \ + "The problem was fixed. Ignore the .syn file." +#define fixed_ignore_word_msg \ + "The problem was fixed. Ignore the key." +#define fixed_drop_invalid_char_msg \ + "The problem was fixed. Dropping invalid chars." +#define fixed_word_truncated_msg \ + "The problem was fixed. The key is truncated." +#define fixed_words_reordered_msg \ + "The problem was fixed. Key will be reordered." +#define fixed_process_syn_file_msg \ + "The problem was fixed. Process the .syn file." +#define fixed_data_block_size_change_msg \ + "The problem was fixed. Changed size of the data block." +#define fixed_change_field_size_msg \ + "The problem was fixed. Change field size." +#define fixed_field_take_longest_str_msg \ + "The problem was fixed. Take the longest string." +#define fixed_field_take_zero_term_str_msg \ + "The problem was fixed. Take a zero-terminated string." +#define fixed_trim_spaces \ + "The problem was fixed. Leading and trailing spaces trimmed." +#define fixed_utf8_drop_invalid_char_msg \ + "The problem was fixed. Dropping invalid UTF-8 characters." + +#endif + diff --git a/lib/stardict/lib_res_store.cpp b/lib/stardict/lib_res_store.cpp new file mode 100644 index 0000000..193b055 --- /dev/null +++ b/lib/stardict/lib_res_store.cpp @@ -0,0 +1,495 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +#else +# include +#endif + +#include "lib_res_store.h" +#include "libcommon.h" +#include "ifo_file.h" +#include "lib_dict_verify.h" + +struct fileitem_t { + std::string filename; + guint32 offset; + guint32 size; +}; + +static bool compare_fileitem(const fileitem_t& left, const fileitem_t& right) +{ + return 0 > strcmp(left.filename.c_str(), right.filename.c_str()); +} + +static bool compare_fileitem_by_offset(const fileitem_t* left, const fileitem_t* right) +{ + return left->offset < right->offset; +} + +class resource_database +{ +public: + resource_database() + : + verif_result(VERIF_RESULT_OK) + { + } + TLoadResult load(const std::string& dirname); + // filename uses database directory separator + bool have_file(const std::string& filename) const; + VerifResult get_verif_result(void) const { return verif_result; } + /* true if res.ridx.gz used, res.ridx otherwise */ + bool res_ridx_compressed(void) const + { + return ridxfilename != ridxfilename_orig; + } + /* true if res.rdic.dz used, res.rdic otherwise */ + bool res_rdic_compressed(void) const + { + return rdicfilename != rdicfilename_orig; + } +private: + int prepare_ridx_file(void); + int prepare_rdic_file(void); + int load_rifo_file(void); + int load_ridx_file(void); + VerifResult load_rdic_file(void); + void print_index(void); + VerifResult verify_data_blocks_overlapping(void); + + std::string rifofilename; + std::string ridxfilename; + std::string ridxfilename_orig; + std::string rdicfilename; + std::string rdicfilename_orig; + std::string dirname; + TempFile ridxtemp; + TempFile rdictemp; + DictInfo dict_info; + std::vector index; + guint32 rdicfilesize; + VerifResult verif_result; +}; + +TLoadResult resource_database::load(const std::string& dirname) +{ + this->dirname = dirname; + verif_result = VERIF_RESULT_OK; + + rifofilename = build_path(dirname, "res.rifo"); + if(!g_file_test(rifofilename.c_str(), G_FILE_TEST_EXISTS)) + return lrNotFound; + + if(load_rifo_file()) { + verif_result = combine_result(verif_result, VERIF_RESULT_FATAL); + return lrError; + } + if(load_ridx_file()) { + verif_result = combine_result(verif_result, VERIF_RESULT_FATAL); + return lrError; + } + verif_result = combine_result(verif_result, load_rdic_file()); + return VERIF_RESULT_CRITICAL <= verif_result ? lrError : lrOK; +} + +int resource_database::prepare_ridx_file(void) +{ + const std::string index_file_name_gz = build_path(dirname, "res.ridx.gz"); + const std::string index_file_name_ridx = build_path(dirname, "res.ridx"); + if(g_file_test(index_file_name_gz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(index_file_name_ridx.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(rdb_two_index_files_msg, index_file_name_gz.c_str(), index_file_name_ridx.c_str()); + verif_result = combine_result(verif_result, VERIF_RESULT_WARNING); + } + ridxfilename_orig = index_file_name_gz; + if(g_file_test(ridxfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + ridxfilename = ridxtemp.create_temp_file(); + if(ridxfilename.empty()) + return EXIT_FAILURE; + if(EXIT_FAILURE == unpack_zlib(ridxfilename_orig.c_str(), ridxfilename.c_str())) + return EXIT_FAILURE; + } else { + ridxfilename_orig = index_file_name_ridx; + ridxfilename = ridxfilename_orig; + } + return EXIT_SUCCESS; +} + +int resource_database::prepare_rdic_file(void) +{ + const std::string dict_file_name_dz = build_path(dirname, "res.rdic.dz"); + const std::string dict_file_name_rdic = build_path(dirname, "res.rdic"); + if(g_file_test(dict_file_name_dz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(dict_file_name_rdic.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(rdb_two_dict_files_msg, dict_file_name_dz.c_str(), dict_file_name_rdic.c_str()); + verif_result = combine_result(verif_result, VERIF_RESULT_WARNING); + } + rdicfilename_orig = dict_file_name_dz; + if(g_file_test(rdicfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + rdicfilename = rdictemp.create_temp_file(); + if(rdicfilename.empty()) + return EXIT_FAILURE; + if(unpack_zlib(rdicfilename_orig.c_str(), rdicfilename.c_str())) + return EXIT_FAILURE; + } else { + rdicfilename_orig = dict_file_name_rdic; + rdicfilename = rdicfilename_orig; + } + return EXIT_SUCCESS; +} + +int resource_database::load_rifo_file(void) +{ + if(!dict_info.load_from_ifo_file(rifofilename, DictInfoType_ResDb)) + return EXIT_FAILURE; + bool have_errors = false; + if(dict_info.get_filecount() == 0) { + g_critical(rdb_filecnt_zero_err, rifofilename.c_str()); + have_errors = true; + } + if(dict_info.get_index_file_size() == 0) { + g_critical(rdb_ridxfilesize_zero_err, rifofilename.c_str()); + have_errors = true; + } + return have_errors ? EXIT_FAILURE : EXIT_SUCCESS; +} + +int resource_database::load_ridx_file(void) +{ + if(prepare_ridx_file()) + return EXIT_FAILURE; + + stardict_stat_t stats; + if (g_stat (ridxfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(file_not_found_idx_err, ridxfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + g_message(rdb_loading_ridx_file_msg, ridxfilename_orig.c_str()); + if (dict_info.get_index_file_size()!=(guint)stats.st_size) { + g_critical(incorrect_ridx_file_size_err, + dict_info.get_index_file_size(), (long) stats.st_size); + return EXIT_FAILURE; + } + + index.clear(); + index.reserve(dict_info.get_filecount()); + + std::vector buf(stats.st_size+1); + gchar * const buffer_beg = &buf[0]; + gchar * const buffer_end = buffer_beg+stats.st_size; + { + FILE *idxfile = g_fopen(ridxfilename.c_str(),"rb"); + size_t fread_size; + fread_size = fread(buffer_beg, 1, stats.st_size, idxfile); + if (fread_size != (size_t)stats.st_size) { + g_print("fread error!\n"); + } + fclose(idxfile); + } + + gchar *p=buffer_beg; + gchar *prefilename=NULL; + int filenamelen; + guint filecount=0; + bool have_errors=false; + fileitem_t fileitem; + size_t size_remain; // to the end of the index file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* p2 = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!p2) { + g_warning(index_file_truncated_err); + have_errors=true; + break; + } + filenamelen = p2 - p; + if (filenamelen==0) { + g_warning(empty_file_name_err); + have_errors=true; + } + if (!g_utf8_validate(p, filenamelen, NULL)) { + std::string tmp(p, filenamelen); + g_warning(invalid_utf8_index_item_err, p, tmp.c_str()); + have_errors=true; + } + if(strchr(p, '\\')) { + g_warning(rdb_invalid_file_name_format_back_spash_err, p); + have_errors=true; + } + if(p[0] == '/') { + g_warning(rdb_invalid_file_name_format_abs_path_err, p); + have_errors=true; + } + if(strstr(p, "//")) { + g_warning(rdb_invalid_file_name_format_empty_dir_err, p); + have_errors=true; + } + if (prefilename) { + int cmpvalue=strcmp(prefilename, p); + if (cmpvalue>0) { + g_warning(wrong_file_order_err, prefilename, p); + have_errors=true; + } + if(cmpvalue==0) { + g_warning(duplicate_file_name, p); + have_errors=true; + } + } + prefilename=p; + fileitem.filename = p; + p += filenamelen + 1; + size_remain = buffer_end - p; + if(size_remain < 2 * sizeof(guint32)) { + g_warning(index_file_truncated_err); + have_errors=true; + break; + } + fileitem.offset = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + fileitem.size = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + if (fileitem.size==0) { + g_warning(empty_block_err, prefilename); + } + filecount++; + index.push_back(fileitem); + } // while + + g_assert(p <= buffer_end); + + if (filecount!=dict_info.get_filecount()) { + g_warning(rdb_incorrect_file_cnt, dict_info.get_filecount(), filecount); + have_errors=true; + } + + return have_errors ? EXIT_FAILURE : EXIT_SUCCESS; +} + +VerifResult resource_database::load_rdic_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + if(prepare_rdic_file()) + return combine_result(result, VERIF_RESULT_FATAL); + + stardict_stat_t stats; + if (g_stat (rdicfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(rdb_dict_file_not_found_err, rdicfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + rdicfilesize = stats.st_size; + + g_message(rdb_loading_dict_file_msg, rdicfilename_orig.c_str()); + clib::File rdicfile(g_fopen(rdicfilename.c_str(), "rb")); + if(!rdicfile) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, rdicfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + + for(size_t i=0; i rdicfilesize) { + g_warning(record_out_of_file_err, index[i].filename.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + continue; + } + } + result = combine_result(result, verify_data_blocks_overlapping()); + return result; +} + +bool resource_database::have_file(const std::string& filename) const +{ + fileitem_t fileitem; + fileitem.filename = filename; + return std::binary_search(index.begin(), index.end(), fileitem, compare_fileitem); +} + +void resource_database::print_index(void) +{ + for(size_t i=0; i sort_index(index.size(), NULL); + for(size_t i=0; i > overlapping_blocks; + ::verify_data_blocks_overlapping(sort_index, overlapping_blocks); + for(size_t i=0; i unused_regions; + verify_unused_regions(sort_index, unused_regions, rdicfilesize); + if(!unused_regions.empty()) { + g_warning(rdb_unreferenced_data_blocks_msg); + for(size_t i = 0; idirname = dirname; + resdirname = build_path(dirname, "res"); + if(!g_file_test(resdirname.c_str(), G_FILE_TEST_IS_DIR)) + return lrNotFound; + return lrOK; +} + +bool resource_files::have_file(const std::string& filename) const +{ + const std::string full_fs_filename(build_path(resdirname, dir_separator_db_to_fs(filename))); + return static_cast(g_file_test(full_fs_filename.c_str(), G_FILE_TEST_IS_REGULAR)); +} + + +resource_storage::resource_storage(void) +: + db(NULL), + files(NULL), + verif_result(VERIF_RESULT_OK) +{ + +} + +resource_storage::~resource_storage(void) +{ + clear(); +} + +TLoadResult resource_storage::load(const std::string& dirname) +{ + clear(); + std::unique_ptr t_db(new resource_database); + TLoadResult res = t_db->load(dirname); + if(res == lrOK) { + g_message(rdb_loaded_db_msg); + verif_result = t_db->get_verif_result(); + db = t_db.release(); + return lrOK; + } + if(res == lrError) { + g_critical(rdb_load_db_failed_msg); + verif_result = t_db->get_verif_result(); + return lrError; + } + std::unique_ptr t_files(new resource_files); + res = t_files->load(dirname); + if(res == lrOK) { + g_message(rdb_loaded_files_msg); + verif_result = VERIF_RESULT_OK; + files = t_files.release(); + return lrOK; + } + if(res == lrError) { + g_critical(rdb_load_files_failed_msg); + verif_result = VERIF_RESULT_FATAL; + return lrError; + } + verif_result = VERIF_RESULT_OK; + return res; +} + +bool resource_storage::have_file(const std::string& filename) const +{ + if(db) + return db->have_file(filename); + if(files) + return files->have_file(filename); + return false; +} + +StorageType resource_storage::get_storage_type(void) const +{ + if(db) + return StorageType_DATABASE; + if(files) + return StorageType_FILE; + return StorageType_UNKNOWN; +} + +bool resource_storage::res_ridx_compressed(void) const +{ + if(db) + return db->res_ridx_compressed(); + return false; +} + +bool resource_storage::res_rdic_compressed(void) const +{ + if(db) + return db->res_rdic_compressed(); + return false; +} + +void resource_storage::clear(void) +{ + if(db) + delete db; + db = NULL; + if(files) + delete files; + files = NULL; + verif_result = VERIF_RESULT_OK; +} + diff --git a/lib/stardict/lib_res_store.h b/lib/stardict/lib_res_store.h new file mode 100644 index 0000000..a90574d --- /dev/null +++ b/lib/stardict/lib_res_store.h @@ -0,0 +1,74 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef LIB_RES_STORE_H_ +#define LIB_RES_STORE_H_ + +#include +#include "libcommon.h" +#include "lib_dict_verify.h" + +class i_resource_storage { +public: + virtual bool have_file(const std::string& filename) const = 0; +}; + +class resource_database; +class resource_files; + +struct resitem_t { + std::string type; + std::string key; +}; + +enum StorageType { + StorageType_UNKNOWN, + // files in res directory + StorageType_FILE, + // database consisting of files: res.rifo, res.ridx, res.rdic + StorageType_DATABASE +}; + +typedef std::vector resitem_vect_t; + + +class resource_storage: public i_resource_storage +{ +public: + resource_storage(void); + ~resource_storage(void); + TLoadResult load(const std::string& dirname); + // filename uses database directory separator + bool have_file(const std::string& filename) const; + VerifResult get_verif_result(void) const { return verif_result; } + StorageType get_storage_type(void) const; + /* true if res.ridx.gz used, res.ridx otherwise + * only when get_storage_type == StorageType_DATABASE */ + bool res_ridx_compressed(void) const; + /* true if res.rdic.dz used, res.rdic otherwise + * only when get_storage_type == StorageType_DATABASE */ + bool res_rdic_compressed(void) const; +private: + void clear(void); + resource_database *db; + resource_files *files; + VerifResult verif_result; +}; + +#endif /* LIB_RES_STORE_H_ */ diff --git a/lib/stardict/libcommon.cpp b/lib/stardict/libcommon.cpp new file mode 100644 index 0000000..6be0a14 --- /dev/null +++ b/lib/stardict/libcommon.cpp @@ -0,0 +1,877 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include "libcommon.h" +#ifdef _WIN32 +# include +#endif + +const char* known_resource_types[] = { + "img", + "snd", + "vdo", + "att", + NULL +}; + +gint stardict_strcmp(const gchar *s1, const gchar *s2) +{ + gint a; + a = g_ascii_strcasecmp(s1, s2); + if (a == 0) + return strcmp(s1, s2); + else + return a; +} + +bool file_name_to_utf8(const std::string& str, std::string& out) +{ + size_t len = str.length(); + gsize bytes_read, bytes_written; + glib::CharStr gstr(g_filename_to_utf8(str.c_str(), len, &bytes_read, + &bytes_written, NULL)); + if(!gstr || bytes_read != len) { + g_error("Unable to convert string %s into utf-8 encoding", str.c_str()); + return false; + } + out = get_impl(gstr); + return true; +} + +bool utf8_to_file_name(const std::string& str, std::string& out) +{ + size_t len = str.length(); + gsize bytes_read, bytes_written; + glib::CharStr gstr(g_filename_from_utf8(str.c_str(), len, &bytes_read, + &bytes_written, NULL)); + if(!gstr || bytes_read != len) { + g_error("Unable to convert utf8 string %s into file name encoding", str.c_str()); + return false; + } + out = get_impl(gstr); + return true; +} + +#ifdef _WIN32 +bool utf8_to_windows(const std::string& str_utf8, std_win_string& out) +{ +#ifdef UNICODE + const int buf_size = MultiByteToWideChar( + CP_UTF8, //__in UINT CodePage, + 0, //__in DWORD dwFlags, + str_utf8.c_str(), //__in LPCSTR lpMultiByteStr, + -1, //__in int cbMultiByte, + NULL, // __out LPWSTR lpWideCharStr, + 0 //__in int cchWideChar + ); + if(buf_size == 0) { + g_warning("Unable to convert from utf-8 to windows encoding. String: %s", + str_utf8.c_str()); + return false; + } + std::vector buf(buf_size); + const int char_num = MultiByteToWideChar( + CP_UTF8, //__in UINT CodePage, + 0, //__in DWORD dwFlags, + str_utf8.c_str(), //__in LPCSTR lpMultiByteStr, + -1, //__in int cbMultiByte, + &buf[0], // __out LPWSTR lpWideCharStr, + buf_size //__in int cchWideChar + ); + if(char_num != buf_size) { + g_warning("Unable to convert from utf-8 to windows encoding. String: %s", + str_utf8.c_str()); + return false; + } + out = &buf[0]; + return true; +#else + glib::Error err; + gchar* tmp = g_locale_from_utf8(str_utf8.c_str(), -1, NULL, NULL, get_addr(err)); + if(!tmp) { + g_warning("Unable to convert from utf-8 to windows encoding: %s", err->message); + return false; + } + out = tmp; + g_free(tmp); + return true; +#endif +} + +bool windows_to_utf8(const std_win_string& str, std::string& out_utf8) +{ +#ifdef UNICODE + const int buf_size = WideCharToMultiByte( + CP_UTF8, // __in UINT CodePage, + 0, // __in DWORD dwFlags, + str.c_str(), // __in LPCWSTR lpWideCharStr, + -1, // __in int cchWideChar, + NULL, // __out LPSTR lpMultiByteStr, + 0, // __in int cbMultiByte, + NULL, // __in LPCSTR lpDefaultChar, + NULL //__out LPBOOL lpUsedDefaultChar + ); + if(buf_size == 0) { + g_warning("Unable to convert from windows encoding to utf-8."); + return false; + } + std::vector buf(buf_size); + const int char_num = WideCharToMultiByte( + CP_UTF8, // __in UINT CodePage, + 0, // __in DWORD dwFlags, + str.c_str(), // __in LPCWSTR lpWideCharStr, + -1, // __in int cchWideChar, + &buf[0], // __out LPSTR lpMultiByteStr, + buf_size, // __in int cbMultiByte, + NULL, // __in LPCSTR lpDefaultChar, + NULL //__out LPBOOL lpUsedDefaultChar + ); + if(char_num != buf_size) { + g_warning("Unable to convert from windows encoding to utf-8."); + return false; + } + out_utf8 = &buf[0]; + return true; +#else + glib::Error err; + gchar* tmp = g_locale_to_utf8(str.c_str(), -1, NULL, NULL, get_addr(err)); + if(!tmp) { + g_warning("Unable to convert from windows encoding to utf-8: %s", err->message); + return false; + } + out_utf8 = tmp; + g_free(tmp); + return true; +#endif +} + +/* Returns a pointer to the first char after the root component. +If str is like "c:\path\...", root_end points after the "c:\". +If str is like "\\server\path\...", root_end points after the "\\server\". +If str is like "\\server", root_end points after the "\\server". +If str is like "\dir\dir", root_end points after the "\". +Otherwise the str is considered to have no root element and root_end points +to the beginning of the string. +The function returns NULL if the path is invalid. +T is either "char" or "const char". */ +template +T* path_root_end_win(T* str) +{ + if(!str) + return NULL; + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + return str + 3; + else if(str[0] == '\\' && str[1] == '\\') { + if(str[2] == '\0') // "\\" - invalid path + return NULL; + char* p = strchr(str+2, '\\'); + if(p) { + if(p == str+2) // "\\\..." - empty server - invalid path + return NULL; + return p + 1; + } else { // str is "\\server" + return strchr(str, '\0'); + } + } else if(str[0] == '\\' && str[1] != '\\') { + return str + 1; + } + return str; +} + +/* The same as path_root_end_win but for wide chars */ +template +T* path_root_end_winW(T* str) +{ + if(!str) + return NULL; + if(is_ascii_alpha(str[0]) && str[1] == L':' && str[2] == L'\\') + return str + 3; + else if(str[0] == L'\\' && str[1] == L'\\') { + if(str[2] == L'\0') // "\\" - invalid path + return NULL; + T* p = StrChr(str+2, L'\\'); + if(p) { + if(p == str+2) // "\\\..." - empty server - invalid path + return NULL; + return p + 1; + } else { // str is "\\server" + return StrChr(str, L'\0'); + } + } else if(str[0] == L'\\' && str[1] != L'\\') { + return str + 1; + } + return str; +} + +/* normalize path - resolve relative components in a path. +For example, path "c:\dir1\dir2\..\file" is converted to "c:\dir1\file". +This function accepts the following paths: +- an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ... + ("c:" is not allowed) +- an absolute path without disk: "\dir\file", ... +- UNC name: "\\server", "\\server\dir", ... +- relative path: "dir", "dir\dir\file", ... +##- a relative path starting with the current directory component ".": ".\", ".\dir", ... + +A reference to the parent of the root directory is considered an error. +For example, these paths are considered invalid: "c:\..\dir1\file", +"\\..\path\file", "\..\dir". +If the path is relative, this function may leave references to the parent directory +if they cannot be resolved in the path given. +For example, "dir\..\..\..\dir2\dir3" is converted to "..\..\dir2\dir3". +Strip "." components. + +If after all transformations we get an empty string, +replace it with the current directory reference, that is '.'. +Empty string is not a valid path. +For example, we get an empty path for "abcd\.." and "abcd\..\". +If the original path is not blank and it ends on backslash, +append backslash to the '.'. That is: +"abcd\.." -> "." +"abcd\..\' -> ".\" + +Return value: EXIT_FAILURE or EXIT_SUCCESS. */ +int norm_path_win(const std::string& path, std::string& result) +{ + result.clear(); + /* std::vector will free the allocated memory block + when this function returns. + + 3 - make sure that the buffer contains at least 3 chars, + that prevents buffer overread in the some checks. + + 1 - terminating '\0' */ + std::vector buf(path.length() + 3 + 1); + char* str = &buf[0]; + // end of string - terminating '\0' + char* str_end = g_stpcpy(str, path.c_str()); + char* root_end = path_root_end_win(str); + if(!root_end) + return EXIT_FAILURE; + /* + if(root_end == str && str[0] == '.' && (str[1] == '\\' || str[1] == '\0')) { + if(str[1] == '\0') + str += 1; + else + str += 2; + root_end = str; + } + */ + // if(str == root_end) - relative path + /*p1 and p2 points to the first char of a path component, + the previous char is normally '\\'. + In each step p2 moves to the next path component. + p1 normally moves forward as well, unless a parent directory reference + is encontered, then p1 moves back. */ + char * p1 = root_end; + char * p2 = root_end; + while(p2 < str_end) { + char *p = strchr(p2, '\\'); + if(!p) + p = str_end; + // [p2, p) - path component + if(p == p2) // empty path component - error + return EXIT_FAILURE; + if(p2[0] == '.' && p2[1] == '.' && p2 + 2 == p) { // parent directory + if(p1 == root_end) { // no component to strip + if(str == root_end) { // relative path + if(p1 != p2) { + p1[0] = '.'; + p1[1] = '.'; + p1[2] = *p; + } + size_t len = p + 1 - p2; + p1 += len; + p2 += len; + } else { // absolute path + return EXIT_FAILURE; // error + } + } else { // search a component to strip + char *p3 = strrchr_len(root_end, p1 - 1 - root_end, '\\'); + if(!p3) + p3 = root_end; + else + ++p3; + // p3 - beginning of the privious to p1 path component + if(p3[0] == '.' && p3[1] == '.' && p3[2] == '\\') { + g_assert(str == root_end); + // the previous component is "..", it cannot be stripped + if(p1 != p2) { + p1[0] = '.'; + p1[1] = '.'; + p1[2] = *p; + } + size_t len = p + 1 - p2; + p1 += len; + p2 += len; + } else { + p1 = p3; + p2 = p + 1; + } + } + } else if(p2[0] == '.' && p2 + 1 == p) { // strip "." component + p2 = p + 1; + } else { // normal directory + if(p1 == p2) { + p1 = p2 = p + 1; + } else { + size_t len = p + 1 - p2; + strncpy(p1, p2, len); + p1 += len; + p2 += len; + } + } + } + /* p1[-1] == '\0' if the last char of the path is not '\\' */ + *p1 = '\0'; + if(str[0] == '\0') { // blank path + str[0] = '.'; + if(!path.empty() && path[path.length()-1] == '\\') { + str[1] = '\\'; + str[2] = '\0'; + } else + str[1] = '\0'; + } + result = str; + return EXIT_SUCCESS; +} + +/* returns true if the path is absolute and false otherwise, +This function does not check that the path is valid +The following paths are accepted: +- an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ... + ("c:" is not allowed) +- an "absolute" path without disk: "\dir\file", ... - this path is considered relative! +- UNC name: "\\server", "\\server\dir", ... +-*/ +bool is_absolute_path_win(const std::string& path) +{ + const char* str = path.c_str(); + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + return true; + if(str[0] == '\\' && str[1] == '\\') + return true; + return false; +} + +/* applies a number of tests to the path +Returns true if all tests passed and false otherwise. */ +bool is_valid_path_win(const std::string& path) +{ + const char* str = path.c_str(); + /* End of the path prefix. + if "c:\abcd" then after "c:\" + if "\\abcd" then after "\\" + if "\abd" the after "\" + otherwise this the first char of the string. */ + const char* prefix_end = str; + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + prefix_end = str + 3; + else if(str[0] == '\\' && str[1] == '\\') + prefix_end = str + 2; + else if(str[0] == '\\') + prefix_end = str + 1; + if(prefix_end[0] == '\\') + return false; + if(strstr(prefix_end, "\\\\")) + return false; + if(strlen(prefix_end) != strcspn(prefix_end, "<>:\"/|?*")) + return false; + for(const char* p = prefix_end; *p; ++p) + if((unsigned char)*p < 32) + return false; + return true; +} + +/* create a relative path from directory base_dir to file or dir path +base_dir and path must have a common prefix, for example, +"c:\dir1\dir2" and "c:\dir1\dir3\dir4" -> "..\dir3\dir4" +Return value: EXIT_FAILURE or EXIT_SUCCESS. + +PathRelativePathTo fuction gives strange results: +"c:\\dir", "c:\\dir", "..\\dir", +"c:\\dir\\", "c:\\dir", "..\\dir" +"c:\\dir\\", "c:\\dir\\", "", +"\\", "\\a\\", - fails! + +That is why I've decided to provide a custom implementation. +base_dir and path must be absolute paths! +*/ +#if 0 +int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path) +{ + rel_path.clear(); + std_win_string base_dir_win; + std_win_string path_win; + if(!utf8_to_windows(base_dir, base_dir_win)) + return EXIT_FAILURE; + if(!utf8_to_windows(path, path_win)) + return EXIT_FAILURE; + if(base_dir_win.length() >= MAX_PATH) + return EXIT_FAILURE; + if(path_win.length() >= MAX_PATH) + return EXIT_FAILURE; + /* The output buffer must be at least MAX_PATH chars. + How much space do we actually need? */ + wchar_t buf[MAX_PATH * 10]; + bool is_file = !path.empty() && path[path.length()-1] != '\\'; + if(!PathRelativePathToW(buf, base_dir_win.c_str(), FILE_ATTRIBUTE_DIRECTORY, + path_win.c_str(), is_file ? 0 : FILE_ATTRIBUTE_DIRECTORY)) + return EXIT_FAILURE; + wchar_t * buf2 = buf; + if(buf[0] == L'.' && buf[1] == L'\\') + buf2 = buf + 2; + else if(buf[0] == L'.' && buf[1] == L'\0') + buf2 = buf + 1; + if(!windows_to_utf8(buf2, rel_path)) + return EXIT_FAILURE; + return EXIT_SUCCESS; +} +#endif + +int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path) +{ + rel_path.clear(); + std_win_string base_dir_win; + std_win_string path_win; + if(!utf8_to_windows(base_dir, base_dir_win)) + return EXIT_FAILURE; + if(!utf8_to_windows(path, path_win)) + return EXIT_FAILURE; + if(base_dir_win.empty()) + return EXIT_FAILURE; + if(path_win.empty()) + return EXIT_FAILURE; + /* Make sure that both paths end with a backslash, that simplifies further processing. + base_dir must be a directory, so adding a backslash won't hurt. + path may be either a file or a directory */ + if(base_dir_win[base_dir_win.length()-1] != L'\\') + base_dir_win += L'\\'; + if(path_win[path_win.length()-1] != L'\\') + path_win += L'\\'; + const wchar_t* c_base_dir_win = base_dir_win.c_str(); + const wchar_t* c_path_win = path_win.c_str(); + const wchar_t* base_dir_win_root_end = path_root_end_winW(c_base_dir_win); + const wchar_t* path_win_root_end = path_root_end_winW(c_path_win); + if(!base_dir_win_root_end || base_dir_win_root_end == c_base_dir_win) + return EXIT_FAILURE; + if(!path_win_root_end || path_win_root_end == c_path_win) + return EXIT_FAILURE; + if(base_dir_win_root_end - c_base_dir_win != path_win_root_end - c_path_win) + return EXIT_FAILURE; // different roots + if(StrCmpNI(c_base_dir_win, c_path_win, base_dir_win_root_end - c_base_dir_win)) + return EXIT_FAILURE; // different roots + /* p and q points to the end of the common part in base_dir_win and path_win respectively. */ + const wchar_t* p = base_dir_win_root_end; + const wchar_t* q = path_win_root_end; + while(true) + { + const wchar_t* p2 = StrChr(p, L'\\'); + const wchar_t* q2 = StrChr(q, L'\\'); + if(!p2 || !q2) + break; + p2++; + q2++; + if(p2 - p != q2 - q) + break; + if(StrCmpNI(p, q, p2-p)) + break; + p = p2; + q = q2; + } + // found the longest common part + /* calculate how many directories to strip from the base_dir + == number of backslashes after p */ + int parent_cnt = 0; + for(const wchar_t* r = StrChr(p, L'\\'); r; r = StrChr(r+1, L'\\')) + ++parent_cnt; + std_win_string rel_path_win; + rel_path_win.reserve(3 * parent_cnt + wcslen(q)); + for(int i=0; i buffer(buffer_size); + char* buf = &buffer[0]; + gulong len; + clib::File out_file(g_fopen(out_file_name, "wb")); + if(!out_file) { + g_critical(open_write_file_err, out_file_name); + return EXIT_FAILURE; + } + while(true) { + len = gzread(get_impl(in), buf, buffer_size); + if(len < 0) { + g_critical(read_file_err, arch_file_name, ""); + return EXIT_FAILURE; + } + if(len == 0) + break; + if(1 != fwrite(buf, len, 1, get_impl(out_file))) { + g_critical(write_file_err, out_file_name); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; +} + +const std::string& TempFile::create_temp_file(void) +{ + clear(); + file_name = ::create_temp_file(); + if(file_name.empty()) + g_critical(create_temp_file_no_name_err); + return file_name; +} + +void TempFile::clear(void) +{ + if(!file_name.empty()) { + if(g_remove(file_name.c_str())) + g_warning(remove_temp_file_err, file_name.c_str()); + file_name.clear(); + } +} + +std::string create_temp_file(void) +{ +#ifdef _WIN32 + /* g_file_open_tmp does not work reliably on Windows + Use platform specific API here. */ + { + UINT uRetVal = 0; + DWORD dwRetVal = 0; + TCHAR szTempFileName[MAX_PATH]; + TCHAR lpTempPathBuffer[MAX_PATH]; + dwRetVal = GetTempPath(MAX_PATH, lpTempPathBuffer); + if (dwRetVal > MAX_PATH || (dwRetVal == 0)) + return ""; + + uRetVal = GetTempFileName(lpTempPathBuffer, // directory for tmp files + TEXT("temp"), // temp file name prefix + 0, // create unique name + szTempFileName); // buffer for name + if (uRetVal == 0) + return ""; + std::string tmp_url_utf8; + std::string tmp_url; + if(!windows_to_utf8(szTempFileName, tmp_url_utf8) + || !utf8_to_file_name(tmp_url_utf8, tmp_url)) + return ""; + FILE * f = g_fopen(tmp_url.c_str(), "wb"); + if(!f) + return ""; + fwrite(" ", 1, 1, f); + fclose(f); + return tmp_url; + } +#else + { + std::string tmp_url; + gchar * buf = NULL; + gint fd = g_file_open_tmp(NULL, &buf, NULL); + if(fd == -1) + return ""; + tmp_url = buf; + g_free(buf); + ssize_t write_size; + write_size = write(fd, " ", 1); + if (write_size == -1) { + g_print("write error!\n"); + } + close(fd); + return tmp_url; + } +#endif +} + +bool is_known_resource_type(const char* str) +{ + for(size_t i=0; known_resource_types[i]; ++i) + if(strcmp(str, known_resource_types[i]) == 0) + return true; + return false; +} + +/* trim string src + * new_beg is set to new beginning of the string + * new_len length of the new string in bytes + * The source string is not modified. */ +void trim_spaces(const char* const src, const char*& new_beg, size_t& new_len) +{ + new_beg = src; + while(*new_beg && g_unichar_isspace(g_utf8_get_char(new_beg))) { + new_beg = g_utf8_next_char(new_beg); + } + const char* p = new_beg; + const char* end = p; + while(*p) { + if(!g_unichar_isspace(g_utf8_get_char(p))) + end = p; + p = g_utf8_next_char(p); + } + if(*end) + end = g_utf8_next_char(end); + new_len = end - new_beg; +} + +/* truncate utf8 string on char boundary (string content is not changed, + * instead desired new length is returned) + * new string length must be <= max_len + * beg - first char of the string, + * str_len - string length in bytes + * return value: length of the truncated string */ +size_t truncate_utf8_string(const char* const beg, const size_t str_len, const size_t max_len) +{ + if(str_len <= max_len) + return str_len; + if(max_len == 0) + return 0; + const char* char_end = beg+max_len; + const char* p = beg+max_len-1; + while(true) { + // find the first byte of a utf8 char + for(; beg <= p && (*p & 0xC0) == 0x80; --p) + ; + if(p& chars) { + std::stringstream buf; + bool add_splitter = false; + for(std::list::const_iterator it = chars.begin(); it != chars.end(); ++it) { + if(add_splitter) + buf << ", "; + buf << static_cast(g_utf8_get_char(*it)); + add_splitter = true; + } + return buf.str(); +} + +char* strrchr_len(char* str, size_t size, char c) +{ + for(char *p = str + size - 1; str <= p; --p) + if(*p == c) + return p; + return NULL; +} + +bool is_ascii_alpha(wchar_t ch) +{ + static const wchar_t alphabet[] = + L"abcdefghijklmnopqrstuvwxyz" + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + for(size_t i=0; i= filepath.length()) + return ""; + std::string::size_type pos2 = filepath.find_last_of('.'); + if(pos2 == std::string::npos || pos2 < pos) + return filepath.substr(pos); + return filepath.substr(pos, pos2-pos); +} + +/* remove the item at path + * if this is a regular file, removed the file; + * if this is a symbolic line, remove the link; + * if this is a directory, remove the directory recursively. + * Return value: EXIT_SUCCESS or EXIT_FAILURE + * */ +int remove_recursive(const std::string& path) +{ + int res = EXIT_SUCCESS; + if(g_file_test(path.c_str(),G_FILE_TEST_IS_DIR)) { + // change file mode so we can read directory and remove items from it + // If we cannot read mode or change it, go on, maybe we can remove the dir anyway. + stardict_stat_t stats; + if(!g_stat(path.c_str(), &stats)) { + // full access for everyone + g_chmod(path.c_str(), stats.st_mode | (S_IRWXU|S_IRWXG|S_IRWXO)); + } + glib::Dir dir(g_dir_open(path.c_str(), 0, NULL)); + if(!dir) + res = EXIT_FAILURE; + else { + std::string dirpath(path); // directory path ending with a dir separator + if(dirpath[dirpath.length()-1] != G_DIR_SEPARATOR) + dirpath += G_DIR_SEPARATOR; + const gchar * filename; + while((filename = g_dir_read_name(get_impl(dir)))) { + if (strcmp(filename, ".") == 0 || strcmp(filename, "..") == 0) + continue; + const std::string itempath(dirpath + filename); + if(remove_recursive(itempath.c_str())) + res = EXIT_FAILURE; + } + } + if(g_rmdir(path.c_str())) + res = EXIT_FAILURE; + return res; + } else { + if(g_remove(path.c_str())) + res = EXIT_FAILURE; + return res; + } +} diff --git a/lib/stardict/libcommon.h b/lib/stardict/libcommon.h new file mode 100644 index 0000000..1689bec --- /dev/null +++ b/lib/stardict/libcommon.h @@ -0,0 +1,260 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with StarDict. If not, see . + */ + +#ifndef _LIBCOMMON_H_ +#define _LIBCOMMON_H_ + +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +#include +#endif + +#include "config-custom.h" + +extern gint stardict_strcmp(const gchar *s1, const gchar *s2); +extern bool file_name_to_utf8(const std::string& str, std::string& out); +extern bool utf8_to_file_name(const std::string& str, std::string& out); +#ifdef _WIN32 +typedef std::basic_string std_win_string; +extern bool utf8_to_windows(const std::string& str_utf8, std_win_string& out); +extern bool windows_to_utf8(const std_win_string& str, std::string& out_utf8); +extern int norm_path_win(const std::string& path, std::string& result); +extern bool is_absolute_path_win(const std::string& path); +extern bool is_valid_path_win(const std::string& path); +extern int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path); +extern bool is_equal_paths_win(const std::string& path1, const std::string& path2); +extern bool is_path_end_with_win(const std::string& path, const std::string& suff); +#endif +inline +bool is_equal_paths(const std::string& path1, const std::string& path2) +{ +#ifdef _WIN32 + return is_equal_paths_win(path1, path2); +#else + return path1 == path2; +#endif +} +inline +bool is_path_end_with(const std::string& path, const std::string& suff) +{ +#ifdef _WIN32 + return is_path_end_with_win(path, suff); +#else + return g_str_has_suffix(path.c_str(), suff.c_str()); +#endif +} +#define DB_DIR_SEPARATOR '/' +#define DB_DIR_SEPARATOR_S "/" +/* functions to convert directory separator characters + * 1. file system separator character = G_DIR_SEPARATOR + * 2. database separator character = DB_DIR_SEPARATOR */ +#if DB_DIR_SEPARATOR == G_DIR_SEPARATOR +inline std::string dir_separator_fs_to_db(const std::string& path) +{ + return path; +} +inline std::string dir_separator_db_to_fs(const std::string& path) +{ + return path; +} +#else +extern std::string dir_separator_fs_to_db(const std::string& path); +extern std::string dir_separator_db_to_fs(const std::string& path); +#endif +std::string build_path(const std::string& path1, const std::string& path2); + +enum TLoadResult { lrOK, lrError, lrNotFound }; + +int unpack_zlib(const char* arch_file_name, const char* out_file_name); + +/* allows to create a temporary file, remove the temporary file when the object is destroyed. */ +class TempFile +{ +public: + TempFile(void) + { + } + ~TempFile(void) + { + clear(); + } + const std::string& create_temp_file(void); + const std::string& get_file_name(void) const + { + return file_name; + } + void clear(void); +private: + std::string file_name; +}; + +template +class auto_executor_t +{ +public: + typedef void (T::*method_t)(void); + auto_executor_t(T& obj, method_t method) + : + obj(obj), + method(method) + { + + } + ~auto_executor_t(void) + { + (obj.*method)(); + } +private: + T& obj; + method_t method; +}; + + +template +class ResourceWrapper { +public: + ResourceWrapper(T *p = NULL) : p_(p) {} + ~ResourceWrapper() { free_resource(); } + T *operator->() const { return p_; } + bool operator!() const { return p_ == NULL; } + + void reset(T *newp) { + if (p_ != newp) { + free_resource(); + p_ = newp; + } + } + + friend inline T *get_impl(const ResourceWrapper& rw) { + return rw.p_; + } + + friend inline T **get_addr(ResourceWrapper& rw) { + return &rw.p_; + } +private: + T *p_; + + void free_resource() { if (p_) unref_res(p_); } + +// Helper for enabling 'if (sp)' + struct Tester { + Tester() {} + private: + void operator delete(void*); + }; + +public: +// enable 'if (sp)' + operator const Tester*() const + { + if (!*this) return 0; + static Tester t; + return &t; + } +}; + +namespace glib { + typedef ResourceWrapper CharStr; + typedef ResourceWrapper Error; + typedef ResourceWrapper CharStrArr; + typedef ResourceWrapper OptionContext; + typedef ResourceWrapper Dir; +} + +namespace clib { + typedef ResourceWrapper File; +} + +namespace zip { +#if ZLIB_VERNUM > 0x1250 +typedef ResourceWrapper gzFile; +#else +typedef ResourceWrapper gzFile; +#endif +} + +/* Create a new temporary file. Return file name in file name encoding. +Return an empty string if file cannot be created. */ +std::string create_temp_file(void); + +extern const char* known_resource_types[]; + +bool is_known_resource_type(const char* str); + +void trim_spaces(const char* const src, const char*& new_beg, size_t& new_len); +size_t truncate_utf8_string(const char* const beg, const size_t str_len, const size_t max_len); +std::string fix_utf8_str(const std::string& str, char replacement_char = '?'); +std::string print_char_codes(const std::list& chars); +char* strrchr_len(char* str, size_t size, char c); +bool is_ascii_alpha(wchar_t ch); +std::string get_basename_without_extension(const std::string& filepath); +int remove_recursive(const std::string& path); + +#define UTF8_BOM "\xEF\xBB\xBF" + +#define known_type_ids \ + "mtygxkwhnr" + +#define file_not_found_err \ + "File does not exist: '%s'" +#define dir_not_found_err \ + "Directory does not exist: '%s'" +#define read_file_err \ + "Error reading file: '%s'. Error: %s." +#define write_file_err \ + "Error writing file: '%s'." +#define open_read_file_err \ + "Unable open file for reading: '%s'. Error: %s." +#define open_write_file_err \ + "Unable open file for writing: '%s'." +#define create_temp_file_err \ + "Unable to create a temporary file: '%s'." +#define create_temp_file_no_name_err \ + "Unable to create a temporary file." +#define remove_temp_file_err \ + "Unable to remove a temporary file: '%s'." +#define copy_file_err \ + "Error copying file from '%s' to '%s'. Error: %s" +#define create_dir_err \ + "Unable to create directory '%s'. Error: %s" +#define open_dir_err \ + "Unable to open directory '%s'. Error: %s" +#define incorrect_arg_err \ + "Incorrect argument." +#define fixed_ignore_msg \ + "The problem was fixed. Ignore the problem." +#define fixed_msg \ + "The problem was fixed." +#define fixed_msg2 \ + "The problem was fixed. " + +/* Maximum size of word in index. strlen(word) < MAX_INDEX_KEY_SIZE. + * See doc/StarDictFileFormat. */ +const int MAX_INDEX_KEY_SIZE=256; + +#endif + diff --git a/lib/tlpi-lib/Build_ename.sh b/lib/tlpi-lib/Build_ename.sh new file mode 100644 index 0000000..d2eb014 --- /dev/null +++ b/lib/tlpi-lib/Build_ename.sh @@ -0,0 +1,53 @@ +#!/bin/sh +# +# Create a new version of the file ename.c.inc by parsing symbolic +# error names defined in errno.h +# +echo '#include ' | cpp -dM | +sed -n -e '/#define *E/s/#define *//p' |sort -k2n | +awk ' +BEGIN { + entries_per_line = 4 + line_len = 68; + last = 0; + varname =" enames"; + print "static char *ename[] = {"; + line = " /* 0 */ \"\""; +} + +{ + if ($2 ~ /^E[A-Z0-9]*$/) { # These entries are sorted at top + synonym[$1] = $2; + } else { + while (last + 1 < $2) { + last++; + line = line ", "; + if (length(line ename) > line_len || last == 1) { + print line; + line = " /* " last " */ "; + line = sprintf(" /* %3d */ ", last); + } + line = line "\"" "\"" ; + } + last = $2; + ename = $1; + for (k in synonym) + if (synonym[k] == $1) ename = ename "/" k; + + line = line ", "; + if (length(line ename) > line_len || last == 1) { + print line; + line = " /* " last " */ "; + line = sprintf(" /* %3d */ ", last);; + } + line = line "\"" ename "\"" ; + } +} +END { + print line; + print "};" + print ""; + print "#define MAX_ENAME " last; +} +' + diff --git a/lib/tlpi-lib/Makefile b/lib/tlpi-lib/Makefile new file mode 100644 index 0000000..c4b81ac --- /dev/null +++ b/lib/tlpi-lib/Makefile @@ -0,0 +1,27 @@ +# Makefile to build library used by all programs +# +# This make file relies on the assumption that each C file in this +# directory belongs in the library +# +# This makefile is very simple so that every version of make +# should be able to handle it +# +include Makefile.inc + +# The library build is "brute force" -- we don't bother with +# dependency checking. + +allgen : ${TLPI_LIB} + +${TLPI_LIB} : *.cpp ename.c.inc + ${CXX} -c -g ${CXXFLAGS} -Wno-write-strings *.cpp + ${RM} ${TLPI_LIB} + ${AR} rs libtlpi.a *.o + +ename.c.inc : + sh Build_ename.sh > ename.c.inc + echo 1>&2 "ename.c.inc built" + +clean : + ${RM} *.o ename.c.inc ${TLPI_LIB} + diff --git a/lib/tlpi-lib/Makefile.inc b/lib/tlpi-lib/Makefile.inc new file mode 100644 index 0000000..bb0934d --- /dev/null +++ b/lib/tlpi-lib/Makefile.inc @@ -0,0 +1,49 @@ +# Makefile.inc - common definitions used by all makefiles + +TLPI_DIR = .. +TLPI_LIB = ${TLPI_DIR}/libtlpi.a +TLPI_INCL_DIR = ${TLPI_DIR}/lib + +LINUX_LIBRT = -lrt +LINUX_LIBDL = -ldl +LINUX_LIBACL = -lacl +LINUX_LIBCRYPT = -lcrypt +LINUX_LIBCAP = -lcap + +# "-Wextra" is a more descriptive synonym for "-W", but only +# available in more recent gcc versions + +# Defining _DEFAULT_SOURCE is a workaround to avoid the warnings that +# would otherwise be produced when compiling code that defines _BSD_SOURCE +# or _SVID_SOURCE against glibc headers in version 2.20 and later. +# (The alternative would be to replace each instance of "#define _SVID_SOURCE" +# or "#define _BSD_SOURCE" in the example programs with +# "#define _DEFAULT_SOURCE".) + +IMPL_CFLAGS = -D_XOPEN_SOURCE=600 \ + -D_DEFAULT_SOURCE \ + -g -I${TLPI_INCL_DIR} \ + -pedantic \ + -Wall \ + -W \ + -Wno-sign-compare \ + -Wno-unused-parameter \ + -Wno-write-strings + +# clang(1) is a little more zealous than gcc(1) with respect to some warnings. +# Suppress those warnings (which, at least in the book code, relate to code +# that is fine). + +ifeq ($(CC),clang) + IMPL_CFLAGS += -Wno-uninitialized -Wno-infinite-recursion +endif + +CFLAGS = ${IMPL_CFLAGS} + +IMPL_THREAD_FLAGS = -pthread + +IMPL_LDLIBS = ${TLPI_LIB} + +LDLIBS = ${IMPL_LDLIBS} + +RM = rm -f diff --git a/lib/tlpi-lib/README b/lib/tlpi-lib/README new file mode 100644 index 0000000..1815ba2 --- /dev/null +++ b/lib/tlpi-lib/README @@ -0,0 +1,7 @@ +A small design note... Many of the library functions defined in the +source code modules in this directory handle errors from system calls +and C library functions by simply terminating the process. This +isn't acceptable design for a "real world" suite of library functions; +I did things this way to keep the source code simpler and shorter. +A properly designed function should indicate an error to its caller +using a status argument or some special function return value. diff --git a/lib/tlpi-lib/ename.c.inc b/lib/tlpi-lib/ename.c.inc new file mode 100644 index 0000000..907a01f --- /dev/null +++ b/lib/tlpi-lib/ename.c.inc @@ -0,0 +1,35 @@ +static char *ename[] = { + /* 0 */ "", + /* 1 */ "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO", "ENXIO", + /* 7 */ "E2BIG", "ENOEXEC", "EBADF", "ECHILD", + /* 11 */ "EAGAIN/EWOULDBLOCK", "ENOMEM", "EACCES", "EFAULT", + /* 15 */ "ENOTBLK", "EBUSY", "EEXIST", "EXDEV", "ENODEV", + /* 20 */ "ENOTDIR", "EISDIR", "EINVAL", "ENFILE", "EMFILE", + /* 25 */ "ENOTTY", "ETXTBSY", "EFBIG", "ENOSPC", "ESPIPE", + /* 30 */ "EROFS", "EMLINK", "EPIPE", "EDOM", "ERANGE", + /* 35 */ "EDEADLK/EDEADLOCK", "ENAMETOOLONG", "ENOLCK", "ENOSYS", + /* 39 */ "ENOTEMPTY", "ELOOP", "", "ENOMSG", "EIDRM", "ECHRNG", + /* 45 */ "EL2NSYNC", "EL3HLT", "EL3RST", "ELNRNG", "EUNATCH", + /* 50 */ "ENOCSI", "EL2HLT", "EBADE", "EBADR", "EXFULL", "ENOANO", + /* 56 */ "EBADRQC", "EBADSLT", "", "EBFONT", "ENOSTR", "ENODATA", + /* 62 */ "ETIME", "ENOSR", "ENONET", "ENOPKG", "EREMOTE", + /* 67 */ "ENOLINK", "EADV", "ESRMNT", "ECOMM", "EPROTO", + /* 72 */ "EMULTIHOP", "EDOTDOT", "EBADMSG", "EOVERFLOW", + /* 76 */ "ENOTUNIQ", "EBADFD", "EREMCHG", "ELIBACC", "ELIBBAD", + /* 81 */ "ELIBSCN", "ELIBMAX", "ELIBEXEC", "EILSEQ", "ERESTART", + /* 86 */ "ESTRPIPE", "EUSERS", "ENOTSOCK", "EDESTADDRREQ", + /* 90 */ "EMSGSIZE", "EPROTOTYPE", "ENOPROTOOPT", + /* 93 */ "EPROTONOSUPPORT", "ESOCKTNOSUPPORT", + /* 95 */ "EOPNOTSUPP/ENOTSUP", "EPFNOSUPPORT", "EAFNOSUPPORT", + /* 98 */ "EADDRINUSE", "EADDRNOTAVAIL", "ENETDOWN", "ENETUNREACH", + /* 102 */ "ENETRESET", "ECONNABORTED", "ECONNRESET", "ENOBUFS", + /* 106 */ "EISCONN", "ENOTCONN", "ESHUTDOWN", "ETOOMANYREFS", + /* 110 */ "ETIMEDOUT", "ECONNREFUSED", "EHOSTDOWN", "EHOSTUNREACH", + /* 114 */ "EALREADY", "EINPROGRESS", "ESTALE", "EUCLEAN", + /* 118 */ "ENOTNAM", "ENAVAIL", "EISNAM", "EREMOTEIO", "EDQUOT", + /* 123 */ "ENOMEDIUM", "EMEDIUMTYPE", "ECANCELED", "ENOKEY", + /* 127 */ "EKEYEXPIRED", "EKEYREVOKED", "EKEYREJECTED", + /* 130 */ "EOWNERDEAD", "ENOTRECOVERABLE", "ERFKILL", "EHWPOISON" +}; + +#define MAX_ENAME 133 diff --git a/lib/tlpi-lib/error_functions.cpp b/lib/tlpi-lib/error_functions.cpp new file mode 100644 index 0000000..e819c16 --- /dev/null +++ b/lib/tlpi-lib/error_functions.cpp @@ -0,0 +1,204 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. This program is distributed without any warranty. * +* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details. * +\*************************************************************************/ + +/* Listing 3-3 */ + +/* error_functions.c + + Some standard error handling routines used by various programs. +*/ +#include +#include "error_functions.h" +#include "tlpi_hdr.h" +#include "ename.c.inc" /* Defines ename and MAX_ENAME */ + +#ifdef __GNUC__ /* Prevent 'gcc -Wall' complaining */ +__attribute__ ((__noreturn__)) /* if we call this function as last */ +#endif /* statement in a non-void function */ +static void +terminate(Boolean useExit3) +{ + char *s; + + /* Dump core if EF_DUMPCORE environment variable is defined and + is a nonempty string; otherwise call exit(3) or _exit(2), + depending on the value of 'useExit3'. */ + + s = getenv("EF_DUMPCORE"); + + if (s != NULL && *s != '\0') + abort(); + else if (useExit3) + exit(EXIT_FAILURE); + else + _exit(EXIT_FAILURE); +} + +/* Diagnose 'errno' error by: + + * outputting a string containing the error name (if available + in 'ename' array) corresponding to the value in 'err', along + with the corresponding error message from strerror(), and + + * outputting the caller-supplied error message specified in + 'format' and 'ap'. */ + +static void +outputError(Boolean useErr, int err, Boolean flushStdout, + const char *format, va_list ap) +{ +#define BUF_SIZE 500 + char buf[BUF_SIZE], userMsg[BUF_SIZE], errText[BUF_SIZE]; + + vsnprintf(userMsg, BUF_SIZE, format, ap); + + if (useErr) + snprintf(errText, BUF_SIZE, " [%s %s]", + (err > 0 && err <= MAX_ENAME) ? + ename[err] : "?UNKNOWN?", strerror(err)); + else + snprintf(errText, BUF_SIZE, ":"); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-truncation" + snprintf(buf, BUF_SIZE, "ERROR%s %s\n", errText, userMsg); +#pragma GCC diagnostic pop + + if (flushStdout) + fflush(stdout); /* Flush any pending stdout */ + fputs(buf, stderr); + fflush(stderr); /* In case stderr is not line-buffered */ +} + +/* Display error message including 'errno' diagnostic, and + return to caller */ + +void +errMsg(const char *format, ...) +{ + va_list argList; + int savedErrno; + + savedErrno = errno; /* In case we change it here */ + + va_start(argList, format); + outputError(TRUE, errno, TRUE, format, argList); + va_end(argList); + + errno = savedErrno; +} + +/* Display error message including 'errno' diagnostic, and + terminate the process */ + +void +errExit(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errno, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Display error message including 'errno' diagnostic, and + terminate the process by calling _exit(). + + The relationship between this function and errExit() is analogous + to that between _exit(2) and exit(3): unlike errExit(), this + function does not flush stdout and calls _exit(2) to terminate the + process (rather than exit(3), which would cause exit handlers to be + invoked). + + These differences make this function especially useful in a library + function that creates a child process that must then terminate + because of an error: the child must terminate without flushing + stdio buffers that were partially filled by the caller and without + invoking exit handlers that were established by the caller. */ + +void +err_exit(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errno, FALSE, format, argList); + va_end(argList); + + terminate(FALSE); +} + +/* The following function does the same as errExit(), but expects + the error number in 'errnum' */ + +void +errExitEN(int errnum, const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errnum, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Print an error message (without an 'errno' diagnostic) */ + +void +fatal(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(FALSE, 0, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Print a command usage error message and terminate the process */ + +void +usageErr(const char *format, ...) +{ + va_list argList; + + fflush(stdout); /* Flush any pending stdout */ + + fprintf(stderr, "Usage: "); + va_start(argList, format); + vfprintf(stderr, format, argList); + va_end(argList); + + fflush(stderr); /* In case stderr is not line-buffered */ + exit(EXIT_FAILURE); +} + +/* Diagnose an error in command-line arguments and + terminate the process */ + +void +cmdLineErr(const char *format, ...) +{ + va_list argList; + + fflush(stdout); /* Flush any pending stdout */ + + fprintf(stderr, "Command-line usage error: "); + va_start(argList, format); + vfprintf(stderr, format, argList); + va_end(argList); + + fflush(stderr); /* In case stderr is not line-buffered */ + exit(EXIT_FAILURE); +} diff --git a/lib/tlpi-lib/error_functions.h b/lib/tlpi-lib/error_functions.h new file mode 100644 index 0000000..8c2817b --- /dev/null +++ b/lib/tlpi-lib/error_functions.h @@ -0,0 +1,55 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. This program is distributed without any warranty. * +* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details. * +\*************************************************************************/ + +/* Listing 3-2 */ + +/* error_functions.h + + Header file for error_functions.c. +*/ +#ifndef ERROR_FUNCTIONS_H +#define ERROR_FUNCTIONS_H + +/* Error diagnostic routines */ + +void errMsg(const char *format, ...); + +#ifdef __GNUC__ + + /* This macro stops 'gcc -Wall' complaining that "control reaches + end of non-void function" if we use the following functions to + terminate main() or some other non-void function. */ + +#define NORETURN __attribute__ ((__noreturn__)) +#else +#define NORETURN +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +void errExit(const char *format, ...) NORETURN ; + +void err_exit(const char *format, ...) NORETURN ; + +void errExitEN(int errnum, const char *format, ...) NORETURN ; + +void fatal(const char *format, ...) NORETURN ; + +void usageErr(const char *format, ...) NORETURN ; + +void cmdLineErr(const char *format, ...) NORETURN ; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/tlpi-lib/get_num.cpp b/lib/tlpi-lib/get_num.cpp new file mode 100644 index 0000000..e8e1ad2 --- /dev/null +++ b/lib/tlpi-lib/get_num.cpp @@ -0,0 +1,103 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. This program is distributed without any warranty. * +* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details. * +\*************************************************************************/ + +/* Listing 3-6 */ + +/* get_num.c + + Functions to process numeric command-line arguments. +*/ +#include +#include +#include +#include +#include +#include "get_num.h" + +/* Print a diagnostic message that contains a function name ('fname'), + the value of a command-line argument ('arg'), the name of that + command-line argument ('name'), and a diagnostic error message ('msg'). */ + +static void +gnFail(const char *fname, const char *msg, const char *arg, const char *name) +{ + fprintf(stderr, "%s error", fname); + if (name != NULL) + fprintf(stderr, " (in %s)", name); + fprintf(stderr, ": %s\n", msg); + if (arg != NULL && *arg != '\0') + fprintf(stderr, " offending text: %s\n", arg); + + exit(EXIT_FAILURE); +} + +/* Convert a numeric command-line argument ('arg') into a long integer, + returned as the function result. 'flags' is a bit mask of flags controlling + how the conversion is done and what diagnostic checks are performed on the + numeric result; see get_num.h for details. + + 'fname' is the name of our caller, and 'name' is the name associated with + the command-line argument 'arg'. 'fname' and 'name' are used to print a + diagnostic message in case an error is detected when processing 'arg'. */ + +static long +getNum(const char *fname, const char *arg, int flags, const char *name) +{ + long res; + char *endptr; + int base; + + if (arg == NULL || *arg == '\0') + gnFail(fname, "null or empty string", arg, name); + + base = (flags & GN_ANY_BASE) ? 0 : (flags & GN_BASE_8) ? 8 : + (flags & GN_BASE_16) ? 16 : 10; + + errno = 0; + res = strtol(arg, &endptr, base); + if (errno != 0) + gnFail(fname, "strtol() failed", arg, name); + + if (*endptr != '\0') + gnFail(fname, "nonnumeric characters", arg, name); + + if ((flags & GN_NONNEG) && res < 0) + gnFail(fname, "negative value not allowed", arg, name); + + if ((flags & GN_GT_0) && res <= 0) + gnFail(fname, "value must be > 0", arg, name); + + return res; +} + +/* Convert a numeric command-line argument string to a long integer. See the + comments for getNum() for a description of the arguments to this function. */ + +long +getLong(const char *arg, int flags, const char *name) +{ + return getNum("getLong", arg, flags, name); +} + +/* Convert a numeric command-line argument string to an integer. See the + comments for getNum() for a description of the arguments to this function. */ + +int +getInt(const char *arg, int flags, const char *name) +{ + long res; + + res = getNum("getInt", arg, flags, name); + + if (res > INT_MAX || res < INT_MIN) + gnFail("getInt", "integer out of range", arg, name); + + return (int) res; +} diff --git a/lib/tlpi-lib/get_num.h b/lib/tlpi-lib/get_num.h new file mode 100644 index 0000000..7b5a0b1 --- /dev/null +++ b/lib/tlpi-lib/get_num.h @@ -0,0 +1,32 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. This program is distributed without any warranty. * +* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details. * +\*************************************************************************/ + +/* Listing 3-5 */ + +/* get_num.h + + Header file for get_num.c. +*/ +#ifndef GET_NUM_H +#define GET_NUM_H + +#define GN_NONNEG 01 /* Value must be >= 0 */ +#define GN_GT_0 02 /* Value must be > 0 */ + + /* By default, integers are decimal */ +#define GN_ANY_BASE 0100 /* Can use any base - like strtol(3) */ +#define GN_BASE_8 0200 /* Value is expressed in octal */ +#define GN_BASE_16 0400 /* Value is expressed in hexadecimal */ + +long getLong(const char *arg, int flags, const char *name); + +int getInt(const char *arg, int flags, const char *name); + +#endif diff --git a/lib/tlpi-lib/tlpi_hdr.h b/lib/tlpi-lib/tlpi_hdr.h new file mode 100644 index 0000000..0d81259 --- /dev/null +++ b/lib/tlpi-lib/tlpi_hdr.h @@ -0,0 +1,88 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. This program is distributed without any warranty. * +* See the files COPYING.lgpl-v3 and COPYING.gpl-v3 for details. * +\*************************************************************************/ + +/* Listing 3-1 */ + +/* tlpi_hdr.h + + Standard header file used by nearly all of our example programs. +*/ +#ifndef TLPI_HDR_H +#define TLPI_HDR_H /* Prevent accidental double inclusion */ + +#include /* Type definitions used by many programs */ +#include /* Standard I/O functions */ +#include /* Prototypes of commonly used library functions, + plus EXIT_SUCCESS and EXIT_FAILURE constants */ +#include /* Prototypes for many system calls */ +#include /* Declares errno and defines error constants */ +#include /* Commonly used string-handling functions */ + +#include "get_num.h" /* Declares our functions for handling numeric + arguments (getInt(), getLong()) */ + +#include "error_functions.h" /* Declares our error-handling functions */ + + + +/* Unfortunately some UNIX implementations define FALSE and TRUE - + here we'll undefine them */ + +#ifdef TRUE +#undef TRUE +#endif + +#ifdef FALSE +#undef FALSE +#endif + +typedef enum { FALSE, TRUE } Boolean; + +#if 0 +#define min(m,n) ((m) < (n) ? (m) : (n)) +#define max(m,n) ((m) > (n) ? (m) : (n)) + +/* Some systems don't define 'socklen_t' */ + +#if defined(__sgi) +typedef int socklen_t; +#endif + +#if defined(__sun) +#include /* Has definition of FASYNC */ +#endif + +#if ! defined(O_ASYNC) && defined(FASYNC) +/* Some systems define FASYNC instead of O_ASYNC */ +#define O_ASYNC FASYNC +#endif + +#if defined(MAP_ANON) && ! defined(MAP_ANONYMOUS) +/* BSD derivatives usually have MAP_ANON, not MAP_ANONYMOUS */ +#define MAP_ANONYMOUS MAP_ANON + +#endif + +#if ! defined(O_SYNC) && defined(O_FSYNC) +/* Some implementations have O_FSYNC instead of O_SYNC */ +#define O_SYNC O_FSYNC +#endif + +#if defined(__FreeBSD__) + +/* FreeBSD uses these alternate names for fields in the sigval structure */ + +#define sival_int sigval_int +#define sival_ptr sigval_ptr +#endif + +#endif + +#endif