Activitatea frenetică desfășurată în cercetarea, dezvoltarea, proiectarea și fabricația produselor aeronautice și spațiale presupune un efort uriaș de cooperare și colaborare între instituții cu profile variate și cu o răspândire remarcabilă. Mai mult, cerințele draconice de calitate și costurile foarte mari de dezvoltare a noilor produse impun selectarea de către proiectant numai a celor mai performante componente și tehnologii. Toate acestea conduc la analiza unui volum uriaș de informație, informație care astăzi nu mai este apanajul unei singure țări. Dacă la cele de mai sus adăugăm necesitatea cunoașterii cerințelor beneficiarilor ajungem la concluzia că, pentru a activa astăzi în industria aerospațială (atât ca producător cât și ca beneficiar) sunt necesare bogate cunoștințe lingvistice. Tendințele de globalizare a economiei și culturii umane s-au făcut simțite în primul rând în domeniul aerospațial (în deceniul 90 au avut loc fuziuni spectaculoase pe de o parte între producători și pe de altă parte între utilizatori), ajungându-se la crearea unor companii multinaționale. Cel mai semnificativ exemplu îl reprezintă industria aerospațială europeană, care cuprinde producători și utilizatori ale căror instituții au unități majore aflate în Anglia, Franța, Germania, Italia, Spania și având colaboratori încă în alte câteva țări.
România, cu o industrie aerospațială dezvoltată, dacă o raportăm la mărimea țării, nu poate menține acest nivel decât prin cooperare și integrare cu marile firme aerospațiale. Apare astfel în mod natural necesitatea unui instrument rapid de sprijin al comunicării intr-un mediu multinațional. Autorii speră că un dicționar poliglot, cuprinzând, pe lângă limbile cu circulație majoră în domeniul aerospațial și limba română, poate fi util tuturor celor care au în vreun fel interese în această ramură a activității umane.
Dicționarul cuprinde termenii generici din domeniul aerospațial în șase limbi: română, engleză, franceză, germană, italiană și spaniolă. VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + result = combine_result(result, load_syn_file()); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + result = combine_result(result, load_dict_file()); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) + return result; + return result; +} + +int binary_dict_parser_t::get_data_fields(guint32 offset, guint32 size, data_field_vect_t& fields) const +{ + if(size == 0) + return EXIT_FAILURE; + fields.clear(); + + const char* word = "???"; + std::vector buffer(size); + + if(!dictfile) { + g_critical(dictionary_no_loaded_err); + return EXIT_FAILURE; + } + if(fseek(get_impl(dictfile), offset, SEEK_SET)) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + if(1 != fread(&buffer[0], size, 1, get_impl(dictfile))) { + std::string error(g_strerror(errno)); + g_critical(read_file_err, dictfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + + dictionary_data_block data_block; + data_block.set_resource_storage(p_res_storage); + data_block.set_fix_errors(fix_errors); + return VERIF_RESULT_FATAL <= data_block.load(&buffer[0], size, dict_info.get_sametypesequence(), word, &fields) + ? EXIT_FAILURE : EXIT_SUCCESS; +} + +VerifResult binary_dict_parser_t::prepare_idx_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + const std::string index_file_name_gz = basefilename + ".idx.gz"; + const std::string index_file_name_idx = basefilename + ".idx"; + if(g_file_test(index_file_name_gz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(index_file_name_idx.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(two_index_files_msg, index_file_name_gz.c_str(), index_file_name_idx.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + } + idxfilename_orig=index_file_name_gz; + if(g_file_test(idxfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + idxfilename = idxtemp.create_temp_file(); + if(idxfilename.empty()) + return combine_result(result, VERIF_RESULT_FATAL); + if(unpack_zlib(idxfilename_orig.c_str(), idxfilename.c_str())) + return combine_result(result, VERIF_RESULT_FATAL); + } else { + idxfilename_orig = index_file_name_idx; + idxfilename = idxfilename_orig; + } + return result; +} + +VerifResult binary_dict_parser_t::prepare_dict_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + const std::string dict_file_name_dz = basefilename + ".dict.dz"; + const std::string dict_file_name_dict = basefilename + ".dict"; + if(g_file_test(dict_file_name_dz.c_str(), G_FILE_TEST_EXISTS) + && g_file_test(dict_file_name_dict.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(two_dict_files_msg, dict_file_name_dz.c_str(), dict_file_name_dict.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + } + dictfilename_orig=dict_file_name_dz; + if(g_file_test(dictfilename_orig.c_str(), G_FILE_TEST_EXISTS)) { + dictfilename = dicttemp.create_temp_file(); + if(dictfilename.empty()) + return combine_result(result, VERIF_RESULT_FATAL); + if(unpack_zlib(dictfilename_orig.c_str(), dictfilename.c_str())) + return combine_result(result, VERIF_RESULT_FATAL); + } else { + dictfilename_orig = dict_file_name_dict; + dictfilename = dictfilename_orig; + } + return result; +} + +int binary_dict_parser_t::load_ifo_file(void) +{ + if(!dict_info.load_from_ifo_file(ifofilename, DictInfoType_NormDict)) + return EXIT_FAILURE; + return EXIT_SUCCESS; +} + +VerifResult binary_dict_parser_t::load_idx_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + { + VerifResult res = prepare_idx_file(); + result = combine_result(result, res); + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= res) + return result; + } + + guint32 idxfilesize; + { + stardict_stat_t stats; + if (g_stat (idxfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(file_not_found_idx_err, idxfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + idxfilesize = (guint32)stats.st_size; + } + g_message(loading_idx_file_msg, idxfilename_orig.c_str()); + + if (dict_info.get_index_file_size() != idxfilesize) { + g_warning(incorrect_idx_file_size_err, + dict_info.get_index_file_size(), idxfilesize); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_index_file_size(idxfilesize); + g_message(fixed_msg); + } else + return result; + } + + index.clear(); + index.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_wordcount())); + + std::vector buf(idxfilesize+1); + gchar * const buffer_beg = &buf[0]; + gchar * const buffer_end = buffer_beg+idxfilesize; + { + FILE *idxfile = g_fopen(idxfilename.c_str(),"rb"); + if(!idxfile) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + if(idxfilesize != fread(buffer_beg, 1, idxfilesize, idxfile)) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, idxfilename.c_str(), error.c_str()); + fclose(idxfile); + return combine_result(result, VERIF_RESULT_FATAL); + } + fclose(idxfile); + } + + const char *p=buffer_beg; + int wordlen; + gint cmpvalue; + guint wordcount=0; + worditem_t worditem, preworditem; + size_t size_remain; // to the end of the index file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* const word_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!word_end) { + g_warning(index_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + worditem.word = p; + wordlen = worditem.word.length(); + if (!g_utf8_validate(worditem.word.c_str(), wordlen, NULL)) { + g_warning(word_invalid_utf8_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + worditem.word = fix_utf8_str(worditem.word, 0); + wordlen = worditem.word.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + const char* const word = worditem.word.c_str(); + if(check_xml_string_chars(word, invalid_chars)) { + result = combine_result(result, VERIF_RESULT_WARNING); + g_message(word_invalid_char_value_err, + word, print_char_codes(invalid_chars).c_str()); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + fix_xml_string_chars(word, worditem.word); + wordlen = worditem.word.length(); + } + } + } + if (wordlen > 0) { + if (wordlen>=MAX_INDEX_KEY_SIZE) { + g_warning(long_word_err, worditem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + wordlen = truncate_utf8_string(worditem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); + worditem.word.resize(wordlen); + g_message(fixed_word_truncated_msg); + } + } + bool have_spaces = false; + if (g_ascii_isspace(worditem.word[0])) { + g_message(word_begin_space_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if (g_ascii_isspace(worditem.word[wordlen-1])) { + g_message(word_end_space_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if(have_spaces && fix_errors) { + g_message(fixed_trim_spaces); + const char* new_beg; + size_t new_len; + trim_spaces(worditem.word.c_str(), new_beg, new_len); + if(new_len == 0) + worditem.word.clear(); + else { + std::string tmp(new_beg, new_len); + worditem.word = tmp; + } + } + } + if(check_stardict_key_chars(worditem.word.c_str())) { + g_message(word_forbidden_chars_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + std::string tmp; + fix_stardict_key_chars(worditem.word.c_str(), tmp); + worditem.word = tmp; + wordlen = worditem.word.length(); + } + } + if (wordlen==0) { + g_warning(empty_word_err); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_ignore_word_msg); + } + if (!preworditem.word.empty() && !worditem.word.empty()) { + cmpvalue=stardict_strcmp(preworditem.word.c_str(), worditem.word.c_str()); + if (cmpvalue>0) { + g_warning(wrong_word_order_err, preworditem.word.c_str(), worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_words_reordered_msg); + } + } + p = word_end + 1; + size_remain = buffer_end - p; + if(size_remain < 2 * sizeof(guint32)) { + g_warning(index_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + worditem.offset = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + worditem.size = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + if (worditem.size==0) { + g_warning(empty_block_err, worditem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + worditem.word.clear(); + g_message(fixed_ignore_word_msg); + } + } + preworditem = worditem; + wordcount++; + index.push_back(worditem); + } // while + + g_assert(p <= buffer_end); + + if (dict_info.get_wordcount() != wordcount) { + g_warning(incorrect_word_cnt_err, dict_info.get_wordcount(), wordcount); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_wordcount(wordcount); + g_message(fixed_msg); + } + } + + for(size_t i=0; i < index.size(); ++i) { + if(index[i].word.empty()) + continue; + for(size_t j=i+1; j < index.size() && index[i].word == index[j].word; ++j) { + if(index[i].offset == index[j].offset && index[i].size == index[j].size) { + g_warning(duplicate_index_item_err, + index[i].word.c_str(), index[i].offset, index[i].size); + result = combine_result(result, VERIF_RESULT_NOTE); + break; + } + } + } + + return result; +} + +VerifResult binary_dict_parser_t::load_syn_file(void) +{ + synfilename = basefilename + ".syn"; + VerifResult result = VERIF_RESULT_OK; + + if (dict_info.get_synwordcount() == 0) { + if (g_file_test(synfilename.c_str(), G_FILE_TEST_EXISTS)) { + g_warning(syn_file_exist_msg); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_process_syn_file_msg); + } else + return result; + } else + return result; + } + + guint32 synfilesize; + { + stardict_stat_t stats; + if (g_stat (synfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_warning(syn_file_no_found_msg, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + synfilesize = stats.st_size; + } + g_message(loading_syn_file_msg, synfilename.c_str()); + + synindex.clear(); + synindex.reserve(std::min(MAX_RESERVED_INDEX_SIZE, dict_info.get_synwordcount())); + + std::vector buf(synfilesize+1); + gchar *buffer_begin = &buf[0]; + gchar *buffer_end = buffer_begin+synfilesize; + { + FILE *synfile = g_fopen(synfilename.c_str(),"rb"); + if(!synfile) { + std::string error(g_strerror(errno)); + g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + if(synfilesize != fread (buffer_begin, 1, synfilesize, synfile)) { + std::string error(g_strerror(errno)); + g_warning(open_read_file_err, synfilename.c_str(), error.c_str()); + result = VERIF_RESULT_CRITICAL; + fclose (synfile); + if(fix_errors) { + dict_info.set_synwordcount(0); + g_message(fixed_ignore_syn_file_msg); + return result; + } else + return result; + } + fclose (synfile); + } + + const char *p=buffer_begin; + int wordlen; + gint cmpvalue; + guint wordcount=0; + synitem_t synitem, presynitem; + size_t size_remain; // to the end of the synonyms file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* const word_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!word_end) { + g_warning(syn_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + synitem.word = p; + wordlen = synitem.word.length(); + if (!g_utf8_validate(synitem.word.c_str(), wordlen, NULL)) { + g_warning(word_invalid_utf8_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + synitem.word = fix_utf8_str(synitem.word); + wordlen = synitem.word.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + const char* const word = synitem.word.c_str(); + if(check_xml_string_chars(word, invalid_chars)) { + result = combine_result(result, VERIF_RESULT_WARNING); + g_message(word_invalid_char_value_err, + word, print_char_codes(invalid_chars).c_str()); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + fix_xml_string_chars(word, synitem.word); + wordlen = synitem.word.length(); + } + } + } + if (wordlen > 0) { + if (wordlen>=MAX_INDEX_KEY_SIZE) { + g_warning(long_word_err, synitem.word.c_str(), MAX_INDEX_KEY_SIZE, wordlen); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + wordlen = truncate_utf8_string(synitem.word.c_str(), wordlen, MAX_INDEX_KEY_SIZE-1); + synitem.word.resize(wordlen); + g_message(fixed_word_truncated_msg); + } + } + bool have_spaces = false; + if (g_ascii_isspace(synitem.word[0])) { + g_message(word_begin_space_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if (g_ascii_isspace(synitem.word[wordlen-1])) { + g_message(word_end_space_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + have_spaces = true; + } + if(have_spaces && fix_errors) { + g_message(fixed_trim_spaces); + const char* new_beg; + size_t new_len; + trim_spaces(synitem.word.c_str(), new_beg, new_len); + if(new_len == 0) + synitem.word.clear(); + else { + std::string tmp(new_beg, new_len); + synitem.word = tmp; + } + } + } + if (check_stardict_key_chars(synitem.word.c_str())) { + g_message(word_forbidden_chars_err, synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_drop_invalid_char_msg); + std::string tmp; + fix_stardict_key_chars(synitem.word.c_str(), tmp); + synitem.word = tmp; + wordlen = synitem.word.length(); + } + } + if (wordlen==0) { + g_warning(empty_word_err); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_ignore_word_msg); + } + if (!presynitem.word.empty() && !synitem.word.empty()) { + cmpvalue=stardict_strcmp(presynitem.word.c_str(), synitem.word.c_str()); + if (cmpvalue>0) { + g_warning(wrong_word_order_err, presynitem.word.c_str(), synitem.word.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) + g_message(fixed_words_reordered_msg); + } + } + p = word_end +1; + size_remain = buffer_end - p; + if(size_remain < sizeof(guint32)) { + g_warning(syn_file_truncated_err); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) + g_message(fixed_ignore_file_tail_msg); + break; + } + synitem.index = g_ntohl(*reinterpret_cast(p)); + if (synitem.index>=dict_info.get_wordcount()) { + g_warning(wrong_index_err, synitem.word.c_str(), synitem.index); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + synitem.word.clear(); + g_message(fixed_ignore_word_msg); + } + } + p+=sizeof(guint32); + presynitem = synitem; + wordcount++; + synindex.push_back(synitem); + } // while + + g_assert(p <= buffer_end); + + if (wordcount != dict_info.get_synwordcount()) { + g_warning(incorrect_syn_word_cnt_err, + dict_info.get_synwordcount(), wordcount); + result = combine_result(result, VERIF_RESULT_CRITICAL); + if(fix_errors) { + dict_info.set_synwordcount(wordcount); + g_message(fixed_msg); + } + } + + for(size_t i=0; i < synindex.size(); ++i) { + for(size_t j=i+1; j < synindex.size() && synindex[i].word == synindex[j].word; ++j) { + if(synindex[i].index == synindex[j].index) { + g_warning(duplicate_syn_item_err, + synindex[i].word.c_str(), synindex[i].index); + result = combine_result(result, VERIF_RESULT_NOTE); + break; + } + } + } + + if((fix_errors ? VERIF_RESULT_FATAL : VERIF_RESULT_CRITICAL) <= result) { + g_warning(load_syn_file_failed_err, synfilename.c_str()); + if(fix_errors) { + dict_info.set_synwordcount(0); + synindex.clear(); + g_message(fixed_ignore_syn_file_msg); + result = VERIF_RESULT_CRITICAL; + } + } + return result; +} + +VerifResult binary_dict_parser_t::load_dict_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + { + VerifResult res = prepare_dict_file(); + result = combine_result(result, res); + if((fix_errors ? + this->word = word; + if(fields) + fields->clear(); + if(data_size == 0) { + g_warning(empty_block_err, word); + return VERIF_RESULT_FATAL; + } + field_num = 0; + VerifResult result = VERIF_RESULT_OK; + if (!sametypesequence.empty()) { + result = combine_result(result, load_sametypesequence(data, data_size, sametypesequence)); + } else { + result = combine_result(result, load_no_sametypesequence(data, data_size)); + } + if(VERIF_RESULT_FATAL <= result) { + if(fields) + fields->clear(); + return result; + } + if(field_num == 0) { + g_warning(data_block_no_fields_err, word); + return VERIF_RESULT_FATAL; + } + return result; +} + +VerifResult dictionary_data_block::load_sametypesequence(const char* const data, size_t data_size, + const std::string& sametypesequence) +{ + const char* p = data; + size_t size_remain; // to the end of the data block + VerifResult result = VERIF_RESULT_OK; + for (size_t i=0; i(p-data) <= data_size); + size_remain = data_size - (p - data); // 0 is OK + const char type_id = sametypesequence[i]; + ext_result_t ext_result(load_field(type_id, p, size_remain)); + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } + result = combine_result(result, ext_result.content); + } + // last item + g_assert(static_cast(p-data) <= data_size); + size_remain = data_size - (p - data); + const char type_id = sametypesequence[sametypesequence.length()-1]; + ext_result_t ext_result; + if(g_ascii_isupper(type_id)) { + ext_result = load_field_sametypesequence_last_upper(type_id, p, size_remain); + } else if(g_ascii_islower(type_id)) { + ext_result = load_field_sametypesequence_last_lower(type_id, p, size_remain); + } else { + g_warning(unknown_type_id_err, word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + } + g_warning(fields_extraction_faild_err, word); + return result; + } + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } else + result = combine_result(result, ext_result.content); + if(!strchr(known_type_ids, type_id)) { + g_warning(unknown_type_id_err, word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_accept_unknown_field_msg); + } + } + g_assert(static_cast(p-data) <= data_size); + size_remain = data_size - (p - data); + if(size_remain > 0) { + g_warning(incorrect_data_block_size_err, word); + result = combine_result(result, VERIF_RESULT_WARNING); + } + return result; +} + +VerifResult dictionary_data_block::load_no_sametypesequence(const char* const data, size_t data_size) +{ + const char* p = data; + size_t size_remain; // to the end of the data block + VerifResult result = VERIF_RESULT_OK; + while(true) { + size_remain = data_size - (p - data); + if(size_remain == 0) + return result; + const char type_id = *p; + ++p; + --size_remain; + ext_result_t ext_result(load_field(type_id, p, size_remain)); + if(FIELD_VERIF_RES_ABORT <= ext_result.field || VERIF_RESULT_FATAL <= ext_result.content) { + g_critical(fields_extraction_faild_err, word); + return VERIF_RESULT_CRITICAL; + } + result = combine_result(result, ext_result.content); + } + g_assert_not_reached(); + return VERIF_RESULT_OK; +} + +ext_result_t dictionary_data_block::load_field(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + ext_result.append(FIELD_VERIF_RES_SKIP); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + if(g_ascii_isupper(type_id)) { + ext_result.append(load_field_upper(type_id, p, size_remain)); + } else if(g_ascii_islower(type_id)) { + ext_result.append(load_field_lower(type_id, p, size_remain)); + } else { + g_warning(unknown_type_id_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + ext_result.append(FIELD_VERIF_RES_ABORT); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + if(!strchr(known_type_ids, type_id)) { + g_warning(unknown_type_id_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_accept_unknown_field_msg); + } + } + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_upper(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain < sizeof(guint32)) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_ABORT); + g_warning(incorrect_data_block_size_err, word); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + guint32 size = g_ntohl(*reinterpret_cast(p)); + if(size_remain < sizeof(guint32) + size) { + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + size = size_remain - sizeof(guint32); + g_message(fixed_change_field_size_msg); + } else { + p += size_remain; + ext_result.append(FIELD_VERIF_RES_ABORT); + return ext_result; + } + } + p += sizeof(guint32); + if(size == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0); + return ext_result; + } + } + const char* data = p; + p += size; + VerifResult result = verify_field_content(type_id, data, size); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, size); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, size); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_lower(const char type_id, + const char*& p, const size_t size_remain) +{ + ext_result_t ext_result; + if(size_remain < 1) { // data must contain at least '\0' + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + p += size_remain; + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } + const char* field_end = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!field_end) { + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_ABORT); + if(fix_errors) { + g_message(fixed_field_take_longest_str_msg); + field_end = p + size_remain; + } else { + p += size_remain; + return ext_result; + } + } + /* In case we need to apply changes to data, we'll store modified copy here. */ + std::string data_str; + const char* data = p; + int datalen = field_end - p; + p += std::min(datalen + 1, size_remain); // shift the pointer to the next field + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0, true); + return ext_result; + } + } + if (!g_utf8_validate(data, datalen, NULL)) { + g_warning(invalid_utf8_field_err, word, type_id, data); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + data_str = fix_utf8_str(std::string(data, datalen), 0); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } else { + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + if(check_xml_string_chars(data, datalen, invalid_chars)) { + std::string temp(data, datalen); + g_message(invalid_field_content_chars_err, word, type_id, temp.c_str(), + print_char_codes(invalid_chars).c_str()); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + fix_xml_string_chars(data, datalen, data_str); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + } + } + VerifResult result = verify_field_content(type_id, data, datalen); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, datalen); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, datalen, true); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_sametypesequence_last_upper(const char type_id, + const char*& p, const size_t size_remain) +{ + guint32 size = size_remain; + ext_result_t ext_result; + if(size == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0); + return ext_result; + } + } + const char* data = p; + p += size; + VerifResult result = verify_field_content(type_id, data, size); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, size); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, size); + return ext_result; +} + +ext_result_t dictionary_data_block::load_field_sametypesequence_last_lower(const char type_id, + const char*& p, const size_t size_remain) +{ + size_t datalen = size_remain; + ext_result_t ext_result; + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } else { + add_field(type_id, NULL, 0, true); + return ext_result; + } + } + /* In case we need to apply changes to data, we'll store the modified copy here. */ + std::string data_str; + const char* data = p; + p += size_remain; // shift the pointer to the next field + const char* p2 = reinterpret_cast(memchr(data, '\0', datalen)); + if(p2) { + // '\0' found in the last record + g_warning(incorrect_data_block_size_err, word); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + datalen = p2 - data; + if(datalen == 0) { + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + g_message(fixed_field_take_zero_term_str_msg); + } + } + if (!g_utf8_validate(data, datalen, NULL)) { + std::string tmp(data, datalen); + g_warning(invalid_utf8_field_err, word, type_id, tmp.c_str()); + ext_result.append(VERIF_RESULT_CRITICAL); + if(fix_errors) { + data_str = fix_utf8_str(std::string(data, datalen), 0); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_utf8_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } else { + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + { // check for invalid chars + typedef std::list str_list_t; + str_list_t invalid_chars; + if(check_xml_string_chars(data, datalen, invalid_chars)) { + std::string temp(data, datalen); + g_message(invalid_field_content_chars_err, word, type_id, temp.c_str(), + print_char_codes(invalid_chars).c_str()); + ext_result.append(VERIF_RESULT_WARNING); + if(fix_errors) { + fix_xml_string_chars(data, datalen, data_str); + data = data_str.c_str(); + datalen = data_str.length(); + g_message(fixed_drop_invalid_char_msg); + if(datalen == 0) { + g_warning(empty_field_err, word, type_id); + ext_result.append(VERIF_RESULT_WARNING); + g_message(fixed_ignore_field_msg); + ext_result.append(FIELD_VERIF_RES_SKIP); + return ext_result; + } + } + } + } + VerifResult result = verify_field_content(type_id, data, datalen); + if(VERIF_RESULT_FATAL <= result) { + ext_result.append(VERIF_RESULT_CRITICAL); + ext_result.append(FIELD_VERIF_RES_SKIP); + std::string temp(data, datalen); + g_warning(invalid_field_content_err, word, type_id, temp.c_str()); + if(fix_errors) { + g_message(fixed_ignore_field_msg); + return ext_result; + } else + return ext_result; + } else + ext_result.append(result); + add_field(type_id, data, datalen, true); + return ext_result; +} + +/* any fatal error may be solved by ignoring this field + * So VERIF_RESULT_FATAL is counted as VERIF_RESULT_CRITICAL by caller function. */ +VerifResult dictionary_data_block::verify_field_content(const char type_id, const char* data, guint32 size) +{ + if(type_id == 'x') + return verify_field_content_x(data, size); + if(type_id == 'r') + return verify_field_content_r(data, size); + return VERIF_RESULT_OK; +} + +VerifResult dictionary_data_block::verify_field_content_x(const char* data, guint32 size) +{ + const char type_id = 'x'; + // create a '\0'-terminated string + std::string temp(data, size); + std::string key; + const char* p; + const char* tag; + VerifResult result = VERIF_RESULT_OK; + for(p = temp.c_str(); p && *p && (tag = strstr(p, "') + ++p; + else if (*p == ' ') { + p = strchr(p, '>'); + if(!p) + break; + ++p; + } else { // error + p = strchr(p, '>'); + if(!p) + break; + ++p; + continue; + } + // p points after the "" + tag = strstr(p, ""); + if(!tag) + break; + key.assign(p, tag - p); + if(p_res_storage && !p_res_storage->have_file(key)) { + g_warning(resource_not_found_msg, + word, type_id, key.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_msg); + } + } + p = tag + sizeof("") - 1; + } + return result; +} + +VerifResult dictionary_data_block::verify_field_content_r(const char* const data, guint32 size, + resitem_vect_t *items) +{ + const char type_id = 'r'; + const char* line_beg = data; + const char* line_end; + resitem_t resitem; + VerifResult result = VERIF_RESULT_OK; + size_t item_num = 0; // number of successfully extracted items + + if(items) + items->clear(); + while(true) { + const gint size_remain = static_cast(size) - (line_beg - data); + if(size_remain <= 0) + break; + line_end = (const char*)memchr(line_beg, '\n', size_remain); + if(!line_end) + line_end = data + size; + if(line_beg == line_end) { + g_warning(resource_invalid_format_empty_line_msg, + word, type_id); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + ++line_beg; + continue; + } else { + continue; + } + } + const std::string line(line_beg, line_end - line_beg); + const char* colon = (const char*)memchr(line_beg, ':', line_end - line_beg); + if(!colon) { + g_warning(resource_invalid_format_colon_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + line_beg = line_end + 1; + continue; + } else { + continue; + } + } + resitem.type.assign(line_beg, colon - line_beg); + ++colon; + resitem.key.assign(colon, line_end - colon); + line_beg = line_end + 1; + if(resitem.type.empty()) { + g_warning(resource_invalid_format_type_blank_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(resitem.key.empty()) { + g_warning(resource_invalid_format_key_blank_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(!is_known_resource_type(resitem.type.c_str())) { + g_warning(resource_invalid_format_unknown_type_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(resitem.key.find('\\') != std::string::npos) { + g_warning(resource_invalid_format_back_spash_msg, + word, type_id, line.c_str()); + result = combine_result(result, VERIF_RESULT_WARNING); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } else { + continue; + } + } + if(p_res_storage && !p_res_storage->have_file(resitem.key)) { + g_warning(resource_resource_nof_found_msg, + word, type_id, line.c_str(), resitem.key.c_str()); + result = combine_result(result, VERIF_RESULT_NOTE); + if(fix_errors) { + g_message(fixed_ignore_resource_line_msg); + continue; + } + } + if(items) + items->push_back(resitem); + ++item_num; + } + if(item_num == 0) { + g_warning(resource_empty_list_msg, + word, type_id); + result = combine_result(result, VERIF_RESULT_WARNING); + } + return result; +} + +void dictionary_data_block::add_field(char type_id, const char* data, size_t datalen, bool add_null) +{ + ++field_num; + if(fields) { + data_field_t field; Is content OK or not? +}; + +struct data_field_t +{ + data_field_t(void) + : + type_id(0) + { + } + + char type_id; + /* for string data types, return string length, + * for binary data types, return data size */ + size_t get_size(void) const; + /* for string data types, return a '\0'-terminated string. */ + const char* get_data(void) const; + void set_data(const char* p, size_t size, bool add_null = false); +private: + /* for string data types, like 'm', data ends with '\0' char, + * for binary data types, the vector contains only data. */ + std::vector data; +}; + +typedef std::vector data_field_vect_t; + +class dictionary_data_block { +public: + dictionary_data_block(void) + : + word(NULL), + p_res_storage(NULL), + fix_errors(false), + fields(NULL), + field_num(0) + { + + } + VerifResult load(const char* const data, size_t data_size, + const std::string& sametypesequence, const char* word, + data_field_vect_t* fields = NULL); + void set_resource_storage(i_resource_storage* p_res_storage) + { + this->p_res_storage = p_res_storage; + } + void set_fix_errors(bool b) + { + fix_errors = b; + } + void set_word(const char* word) + { + this->word = word; + } + /* if you use this method directly, do not forget to set_word(). NULL as argument is OK. + * any fatal error may be solved by ignoring this field */ + VerifResult verify_field_content_r(const char* const data, guint32 size, resitem_vect_t *items = NULL); +private: + VerifResult load_no_sametypesequence(const char* const data, size_t data_size); + VerifResult load_sametypesequence(const char* const data, size_t data_size, + const std::string& sametypesequence); + /* for all load_field* methods + * all method have two means to indicate processing result. + * ext_result_t.content holds the integral result of the processing the field. + * VERIF_RESULT_FATAL is counted as VERIF_RESULT_CRITICAL by caller function. + * Any fatal error may be solved by ignoring the field or entire field collection. + * ext_result_t.field indicates what we can do next (switch to the next field, + * or abort processing the field collection). + * p parameter initially point to the beginning of the data area. + * Field extraction method must move it past the processed field, + * to the beginning of the next field. + * size of the available data is restricted by size_remain parameter. + * Extraction function is not allowed to access data outside this region. + * The field may occupy either full region or only part of it. + * Extraction function should read as much data as it needs but not more. + * + * fix_errors. When true, we are working hard to all fix errors, extract as much data + * as possible. We performs as many tests as possible, testing fixed data. + * + * fields. When specified, all extracted fields are added here. + * When fix_errors is specified, we add only clean fields, after all possible fixes. + * When fix_errors is not specified, we fix only errors >= VERIF_RESULT_CRITICAL. + * What should we do in that last case? We need to fix some errors anyway, + * even when fix_errors is false. Otherwise we can not go forward. */ + ext_result_t load_field(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_upper(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_lower(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_sametypesequence_last_upper(const char type_id, + const char*& p, size_t size_remain); + ext_result_t load_field_sametypesequence_last_lower(const char type_id, + const char*& p, size_t size_remain); + VerifResult verify_field_content(const char type_id, const char* data, guint32 size); + VerifResult verify_field_content_x(const char* data, guint32 size); + void add_field(char type_id, const char* data, size_t datalen, bool add_null = false); + + const char* word; + i_resource_storage* p_res_storage; // may be NULL + bool fix_errors; + data_field_vect_t* fields; + size_t field_num; // number of fields extracted +}; + + +#endif /* LIB_DICT_DATA_BLOCK_H_ */ diff --git a/lib/stardict/lib_dict_verify.cpp b/lib/stardict/lib_dict_verify.cpp new file mode 100644 index 0000000..bba7a57 --- /dev/null +++ b/lib/stardict/lib_dict_verify.cpp @@ -0,0 +1,72 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Verification result: OK.", ifofilename); + else if(result < VERIF_RESULT_CRITICAL) + g_message("Dictionary '%s'. Verification result: Non-critical problems were found. The dictionary is safe to use.", ifofilename); + else + g_message("Dictionary '%s'. Verification result: The dictionary is broken. Fields do not fit into the data block, incorrect data block size." +#define empty_field_err \ + "Index item '%s'. Empty field in definition data block. Type ID '%c'." +#define invalid_utf8_field_err \ + "Index item '%s'. Invalid field. Type id = '%c'. Invalid utf8 string: '''\n%s\n'''" +#define invalid_utf8_index_item_err \ + "Index item '%s'. Invalid field. Invalid utf8 string: '''\n%s\n'''" +#define invalid_field_content_err \ + "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''" +#define invalid_chars_in_textual_data_msg \ + "The text contains either invalid Unicode characters " \ + "or Unicode characters not suitable for textual data (mainly control characters). " \ + "The following characters are prohibited: %s." +#define invalid_field_content_chars_err \ + "Index item '%s'. Type id '%c'. Invalid field content: '''\n%s\n'''\n"\ + invalid_chars_in_textual_data_msg +#define syn_file_truncated_err \ + "Synonyms file is truncated, last record is truncated." +#define unknown_type_id_err \ + "Index item '%s'. Unknown type identifier '%c'." +#define empty_word_err \ + "Blank key in index." +#define empty_file_name_err \ + "Blank file name in index." +#define long_word_err \ + "Index item '%s'. Key is too long. Maximum allowed length: %d, key length: %d." +#define word_begin_space_err \ + "Index item '%s'. Key begins with a space character." +#define word_end_space_err \ + "Index item '%s'. Key ends with a space character." +#define word_forbidden_chars_err \ + "Index item '''%s'''\nKey contains forbidden characters." +#define word_invalid_utf8_err \ + "Index item '%s'. Invalid utf8 string." +#define word_invalid_char_value_err \ + "Index item '%s'. Invalid item name.\n" \ + invalid_chars_in_textual_data_msg +#define wrong_word_order_err \ + "Wrong key order, first key = '%s', second key = '%s'." +#define wrong_file_order_err \ + "Wrong file order, first file name = '%s', second file name = '%s'." +#define fields_extraction_faild_err \ + "Index item '%s'. Extraction of the fields failed." +#define unsupported_file_type_err \ + "Unsupported file type. File must have 'ifo' extension. File: '%s'." +#define dictionary_no_loaded_err \ + "Dictionary is not loaded." +#define file_not_found_idx_err \ + "Unable to find index file: '%s'. Error: %s." +#define loading_idx_file_msg \ + "Loading index file: '%s'..." +#define incorrect_idx_file_size_err \ + "Incorrect size of the index file: in .ifo file, idxfilesize=%u, real file size is %u." +#define incorrect_ridx_file_size_err \ + "Incorrect size of the index file: in .rifo file, ridxfilesize=%d, real file size is %ld." +#define empty_block_err \ + "Index item '%s'. Data block size = 0." +#define incorrect_word_cnt_err \ + "Incorrect number of words: in .ifo file, wordcount=%d, while the real word count is %d." +#define incorrect_syn_word_cnt_err \ + "Incorrect number of words: in .ifo file, synwordcount=%d, while the real synwordcount is %d." +#define duplicate_index_item_err \ + "Multiple index items have the same key = '%s', offset = %d, size = %d." +#define duplicate_syn_item_err \ + "Multiple synonym items with the same key = '%s', index = %d." +#define syn_file_exist_msg \ + ".syn file exists but there is no \"synwordcount=\" entry in .ifo file." +#define syn_file_no_found_msg \ + "Unable to find synonyms file '%s'. Error: %s." +#define loading_syn_file_msg \ + "Loading synonyms file: '%s'..." +#define wrong_index_err \ + "Index item '%s'. Wrong index of entry in the index file: %d." +#define load_syn_file_failed_err \ + "Loading synonyms file failed: '%s'." +#define dict_file_not_found_err \ + "Dictionary file does not exist: '%s'. Error: %s." +#define loading_dict_file_err \ + "Loading dictionary file: '%s'..." +#define open_dict_file_failed_err \ + "Unable open dictionary file '%s'. Error: %s." +#define record_out_of_file_err \ + "Index item '%s'. Incorrect size, offset parameters. Referenced data block is outside dictionary file." +#define overlapping_data_blocks_msg \ + "Index item '%s' and index item '%s' refer to overlapping but not equal regions (offset, size): " \ + "(%u, %u) and (%u, %u)." +#define unreferenced_data_blocks_msg \ + "Dictionary contains unreferenced data blocks (offset, size):" +#define rdb_unreferenced_data_blocks_msg \ + "Resource database contains unreferenced data blocks (offset, size):" +#define data_block_no_fields_err \ + "Index item '%s'. No fields were extracted." +#define resource_not_found_msg \ + "Index item '%s'. Type id '%c'. The field refers to resource '%s', that is not found in resource storage." +#define resource_invalid_format_empty_line_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Empty resource line." +#define resource_invalid_format_colon_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. ':' is not found." +#define resource_invalid_format_type_blank_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Type is blank." +#define resource_invalid_format_key_blank_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key is blank." +#define resource_invalid_format_unknown_type_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Unknown type." +#define resource_invalid_format_back_spash_msg \ + "Index item '%s'. Type id '%c'. Invalid field format. Line: '%s'. Key contains '\\' char." +#define resource_resource_nof_found_msg \ + "Index item '%s'. Type id '%c'. Line '%s'. The field refers to resource '%s', that is not found in resource storage." +#define resource_empty_list_msg \ + "Index item '%s'. Type id '%c'. Empty resource list." +#define two_index_files_msg \ + "Two index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define two_dict_files_msg \ + "Two dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define rdb_filecnt_zero_err \ + "Resource database '%s'. No files. filecount = 0." +#define rdb_ridxfilesize_zero_err \ + "Resource database '%s'. Empty index file size. ridxfilesize = 0." +#define rdb_invalid_file_name_format_back_spash_err \ + "Index item '%s'. Found '\\' character. '/' must be used as directory separator." +#define rdb_invalid_file_name_format_abs_path_err \ + "Index item '%s'. File name must not start with directory separator '/'." +#define rdb_invalid_file_name_format_empty_dir_err \ + "Index item '%s'. Empty directory in file path: '//'." +#define rdb_incorrect_file_cnt \ + "Incorrect number of files: in .rifo file, filecount=%d, while the real file count is %d." +#define rdb_dict_file_not_found_err \ + "Unable to find resource dictionary file: '%s'. Error: %s." +#define rdb_loading_ridx_file_msg \ + "Loading resource index file: '%s'..." +#define rdb_loading_dict_file_msg \ + "Loading resource dictionary file: '%s'..." +#define rdb_loaded_db_msg \ + "Resource storage loaded. Type - database." +#define rdb_load_db_failed_msg \ + "Resource storage load failed. Type - database." +#define rdb_loaded_files_msg \ + "Resource storage loaded. Type - files." +#define rdb_load_files_failed_msg \ + "Resource storage load failed. Type - files." +#define rdb_two_index_files_msg \ + "Two resource index files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." +#define rdb_two_dict_files_msg \ + "Two resource dictionary files were found: compressed '%s' and uncompressed '%s'. We will use the compressed version." + +#define fixed_ignore_field_msg \ + "The problem was fixed. Ignore the field." +#define duplicate_file_name \ + "Multiple index items with the same file name: '%s'." +#define fixed_accept_unknown_field_msg \ + "The problem was fixed. Accept unknown field type." +#define fixed_ignore_resource_line_msg \ + "The problem was fixed. Ignore the resource line." +#define fixed_ignore_file_tail_msg \ + "The problem was fixed. Ignore the tail of the file." +#define fixed_ignore_syn_file_msg \ + "The problem was fixed. Ignore the .syn file." +#define fixed_ignore_word_msg \ + "The problem was fixed. Ignore the key." +#define fixed_drop_invalid_char_msg \ + "The problem was fixed. Dropping invalid chars." +#define fixed_word_truncated_msg \ + "The problem was fixed. The key is truncated." +#define fixed_words_reordered_msg \ + "The problem was fixed. Key will be reordered." +#define fixed_process_syn_file_msg \ + "The problem was fixed. Process the .syn file." +#define fixed_data_block_size_change_msg \ + "The problem was fixed. Changed size of the data block." +#define fixed_change_field_size_msg \ + "The problem was fixed. Change field size." +#define fixed_field_take_longest_str_msg \ + "The problem was fixed. Take the longest string." +#define fixed_field_take_zero_term_str_msg \ + "The problem was fixed. Take a zero-terminated string." +#define fixed_trim_spaces \ + "The problem was fixed. Leading and trailing spaces trimmed." +#define fixed_utf8_drop_invalid_char_msg \ + "The problem was fixed. EXIT_FAILURE : EXIT_SUCCESS; +} + +int resource_database::load_ridx_file(void) +{ + if(prepare_ridx_file()) + return EXIT_FAILURE; + + stardict_stat_t stats; + if (g_stat (ridxfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(file_not_found_idx_err, ridxfilename.c_str(), error.c_str()); + return EXIT_FAILURE; + } + g_message(rdb_loading_ridx_file_msg, ridxfilename_orig.c_str()); + if (dict_info.get_index_file_size()!=(guint)stats.st_size) { + g_critical(incorrect_ridx_file_size_err, + dict_info.get_index_file_size(), (long) stats.st_size); + return EXIT_FAILURE; + } + + index.clear(); + index.reserve(dict_info.get_filecount()); + + std::vector buf(stats.st_size+1); + gchar * const buffer_beg = &buf[0]; + gchar * const buffer_end = buffer_beg+stats.st_size; + { + FILE *idxfile = g_fopen(ridxfilename.c_str(),"rb"); + size_t fread_size; + fread_size = fread(buffer_beg, 1, stats.st_size, idxfile); + if (fread_size != (size_t)stats.st_size) { + g_print("fread error!\n"); + } + fclose(idxfile); + } + + gchar *p=buffer_beg; + gchar *prefilename=NULL; + int filenamelen; + guint filecount=0; + bool have_errors=false; + fileitem_t fileitem; + size_t size_remain; // to the end of the index file + + while (p < buffer_end) { + size_remain = buffer_end - p; + const char* p2 = reinterpret_cast(memchr(p, '\0', size_remain)); + if(!p2) { + g_warning(index_file_truncated_err); + have_errors=true; + break; + } + filenamelen = p2 - p; + if (filenamelen==0) { + g_warning(empty_file_name_err); + have_errors=true; + } + if (!g_utf8_validate(p, filenamelen, NULL)) { + std::string tmp(p, filenamelen); + g_warning(invalid_utf8_index_item_err, p, tmp.c_str()); + have_errors=true; + } + if(strchr(p, '\\')) { + g_warning(rdb_invalid_file_name_format_back_spash_err, p); + have_errors=true; + } + if(p[0] == '/') { + g_warning(rdb_invalid_file_name_format_abs_path_err, p); + have_errors=true; + } + if(strstr(p, "//")) { + g_warning(rdb_invalid_file_name_format_empty_dir_err, p); + have_errors=true; + } + if (prefilename) { + int cmpvalue=strcmp(prefilename, p); + if (cmpvalue>0) { + g_warning(wrong_file_order_err, prefilename, p); + have_errors=true; + } + if(cmpvalue==0) { + g_warning(duplicate_file_name, p); + have_errors=true; + } + } + prefilename=p; + fileitem.filename = p; + p += filenamelen + 1; + size_remain = buffer_end - p; + if(size_remain < 2 * sizeof(guint32)) { + g_warning(index_file_truncated_err); + have_errors=true; + break; + } + fileitem.offset = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + fileitem.size = g_ntohl(*reinterpret_cast(p)); + p += sizeof(guint32); + if (fileitem.size==0) { + g_warning(empty_block_err, prefilename); + } + filecount++; + index.push_back(fileitem); + } // while + + g_assert(p <= buffer_end); + + if (filecount!=dict_info.get_filecount()) { + g_warning(rdb_incorrect_file_cnt, dict_info.get_filecount(), filecount); + have_errors=true; + } + + return have_errors ? EXIT_FAILURE : EXIT_SUCCESS; +} + +VerifResult resource_database::load_rdic_file(void) +{ + VerifResult result = VERIF_RESULT_OK; + if(prepare_rdic_file()) + return combine_result(result, VERIF_RESULT_FATAL); + + stardict_stat_t stats; + if (g_stat (rdicfilename.c_str(), &stats) == -1) { + std::string error(g_strerror(errno)); + g_critical(rdb_dict_file_not_found_err, rdicfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + rdicfilesize = stats.st_size; + + g_message(rdb_loading_dict_file_msg, rdicfilename_orig.c_str()); + clib::File rdicfile(g_fopen(rdicfilename.c_str(), "rb")); + if(!rdicfile) { + std::string error(g_strerror(errno)); + g_critical(open_read_file_err, rdicfilename.c_str(), error.c_str()); + return combine_result(result, VERIF_RESULT_FATAL); + } + + for(size_t i=0; i rdicfilesize) { + g_warning(record_out_of_file_err, index[i].filename.c_str()); + result = combine_result(result, VERIF_RESULT_CRITICAL); + continue; + } + } + result = combine_result(result, verify_data_blocks_overlapping()); + return result; +} + +bool resource_database::have_file(const std::string& filename) const +{ + fileitem_t fileitem; + fileitem.filename = filename; + return std::binary_search(index.begin(), index.end(), fileitem, compare_fileitem); +} + +void resource_database::print_index(void) +{ + for(size_t i=0; i sort_index(index.size(), NULL); + for(size_t i=0; i > overlapping_blocks; + ::verify_data_blocks_overlapping(sort_index, overlapping_blocks); + for(size_t i=0; i unused_regions; + verify_unused_regions(sort_index, unused_regions, rdicfilesize); + if(!unused_regions.empty()) { + g_warning(rdb_unreferenced_data_blocks_msg); + for(size_t i = 0; idirname = dirname; + resdirname = build_path(dirname, "res"); + if(!g_file_test(resdirname.c_str(), G_FILE_TEST_IS_DIR)) + return lrNotFound; + return lrOK; +} + +bool resource_files::have_file(const std::string& filename) const +{ + const std::string full_fs_filename(build_path(resdirname, dir_separator_db_to_fs(filename))); + return static_cast(g_file_test(full_fs_filename.c_str(), G_FILE_TEST_IS_REGULAR)); +} + + +resource_storage::resource_storage(void) +: + db(NULL), + files(NULL), + verif_result(VERIF_RESULT_OK) +{ + +} + +resource_storage::~resource_storage(void) +{ + clear(); +} + +TLoadResult resource_storage::load(const std::string& dirname) +{ + clear(); + std::unique_ptr t_db(new resource_database); + TLoadResult res = t_db->load(dirname); + if(res == lrOK) { + g_message(rdb_loaded_db_msg); + verif_result = t_db->get_verif_result(); + db = t_db.release(); + return lrOK; + } + if(res == lrError) { + g_critical(rdb_load_db_failed_msg); + verif_result = t_db->get_verif_result(); + return lrError; + } + std::unique_ptr t_files(new resource_files); + res = t_files->load(dirname); + if(res == lrOK) { + g_message(rdb_loaded_files_msg); + verif_result = VERIF_RESULT_OK; + files = t_files.release(); + return lrOK; + } + if(res == lrError) { + g_critical(rdb_load_files_failed_msg); + verif_result = VERIF_RESULT_FATAL; + return lrError; + } + verif_result = VERIF_RESULT_OK; + return res; +} + +bool resource_storage::have_file(const std::string& filename) const +{ + if(db) + return db->have_file(filename); + if(files) + return files->have_file(filename); + return false; +} + +StorageType resource_storage::get_storage_type(void) const +{ + if(db) + return StorageType_DATABASE; + if(files) + return StorageType_FILE; + return StorageType_UNKNOWN; +} + +bool resource_storage::res_ridx_compressed(void) const +{ + if(db) + return db->res_ridx_compressed(); + return false; +} + +bool resource_storage::res_rdic_compressed(void) const +{ + if(db) + return db->res_rdic_compressed(); + return false; +} + +void resource_storage::clear(void) +{ + if(db) + delete db; + db = NULL; + if(files) + delete files; + files = NULL; + verif_result = VERIF_RESULT_OK; +} + diff --git a/lib/stardict/lib_res_store.h b/lib/stardict/lib_res_store.h new file mode 100644 index 0000000..a90574d --- /dev/null +++ b/lib/stardict/lib_res_store.h @@ -0,0 +1,74 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. String: %s", + str_utf8.c_str()); + return false; + } + std::vector buf(buf_size); + const int char_num = MultiByteToWideChar( + CP_UTF8, //__in UINT CodePage, + 0, //__in DWORD dwFlags, + str_utf8.c_str(), //__in LPCSTR lpMultiByteStr, + -1, //__in int cbMultiByte, + &buf[0], // __out LPWSTR lpWideCharStr, + buf_size //__in int cchWideChar + ); + if(char_num != buf_size) { + g_warning("Unable to convert from utf-8 to windows encoding. String: %s", + str_utf8.c_str()); + return false; + } + out = &buf[0]; + return true; +#else + glib::Error err; + gchar* tmp = g_locale_from_utf8(str_utf8.c_str(), -1, NULL, NULL, get_addr(err)); + if(!tmp) { + g_warning("Unable to convert from utf-8 to windows encoding: %s", err->message); + return false; + } + out = tmp; + g_free(tmp); + return true; +#endif +} + +bool windows_to_utf8(const std_win_string& str, std::string& out_utf8) +{ +#ifdef UNICODE + const int buf_size = WideCharToMultiByte( + CP_UTF8, // __in UINT CodePage, + 0, // __in DWORD dwFlags, + str.c_str(), // __in LPCWSTR lpWideCharStr, + -1, // __in int cchWideChar, + NULL, // __out LPSTR lpMultiByteStr, + 0, // __in int cbMultiByte, + NULL, // __in LPCSTR lpDefaultChar, + NULL //__out LPBOOL lpUsedDefaultChar + ); + if(buf_size == 0) { + g_warning("Unable to convert from windows encoding to utf-8."); + return false; + } + std::vector buf(buf_size); + const int char_num = WideCharToMultiByte( + CP_UTF8, // __in UINT CodePage, + 0, // __in DWORD dwFlags, + str.c_str(), // __in LPCWSTR lpWideCharStr, + -1, // __in int cchWideChar, + &buf[0], // __out LPSTR lpMultiByteStr, + buf_size, // __in int cbMultiByte, + NULL, // __in LPCSTR lpDefaultChar, + NULL //__out LPBOOL lpUsedDefaultChar + ); + if(char_num != buf_size) { + g_warning("Unable to convert from windows encoding to utf-8."); + return false; + } + out_utf8 = &buf[0]; + return true; +#else + glib::Error err; + gchar* tmp = g_locale_to_utf8(str.c_str(), -1, NULL, NULL, get_addr(err)); + if(!tmp) { + g_warning("Unable to convert from windows encoding to utf-8: %s", err->message); + return false; + } + out_utf8 = tmp; + g_free(tmp); + return true; +#endif +} + +/* Returns a pointer to the first char after the root component. +If str is like "c:\path\...", root_end points after the "c:\". +If str is like "\\server\path\...", root_end points after the "\\server\". +If str is like "\\server", root_end points after the "\\server". +If str is like "\dir\dir", root_end points after the "\". +Otherwise the str is considered to have no root element and root_end points +to the beginning of the string. +The function returns NULL if the path is invalid. +T is either "char" or "const char". */ +template +T* path_root_end_win(T* str) +{ + if(!str) + return NULL; + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + return str + 3; + else if(str[0] == '\\' && str[1] == '\\') { + if(str[2] == '\0') // "\\" - invalid path + return NULL; + char* p = strchr(str+2, '\\'); + if(p) { + if(p == str+2) // "\\\..." - empty server - invalid path + return NULL; + return p + 1; + } else { // str is "\\server" + return strchr(str, '\0'); + } + } else if(str[0] == '\\' && str[1] != '\\') { + return str + 1; + } + return str; +} + +/* The same as path_root_end_win but for wide chars */ +template +T* path_root_end_winW(T* str) +{ + if(!str) + return NULL; + if(is_ascii_alpha(str[0]) && str[1] == L':' && str[2] == L'\\') + return str + 3; + else if(str[0] == L'\\' && str[1] == L'\\') { + if(str[2] == L'\0') // "\\" - invalid path + return NULL; + T* p = StrChr(str+2, L'\\'); + if(p) { + if(p == str+2) // "\\\..." - empty server - invalid path + return NULL; + return p + 1; + } else { // str is "\\server" + return StrChr(str, L'\0'); + } + } else if(str[0] == L'\\' && str[1] != L'\\') { + return str + 1; + } + return str; +} + +/* normalize path - resolve relative components in a path. +For example, path "c:\dir1\dir2\..\file" is converted to "c:\dir1\file". +This function accepts the following paths: +- an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ... + ("c:" is not allowed) +- an absolute path without disk: "\dir\file", ... +- UNC name: "\\server", "\\server\dir", ... +- relative path: "dir", "dir\dir\file", ... +##- a relative path starting with the current directory component ".": ".\", ".\dir", ... + +A reference to the parent of the root directory is considered an error. +For example, these paths are considered invalid: "c:\..\dir1\file", +"\\..\path\file", "\..\dir". +If the path is relative, this function may leave references to the parent directory +if they cannot be resolved in the path given. +For example, "dir\..\..\..\dir2\dir3" is converted to "..\..\dir2\dir3". +Strip "." components. + +If after all transformations we get an empty string, +replace it with the current directory reference, that is '.'. +Empty string is not a valid path. +For example, we get an empty path for "abcd\.." and "abcd\..\". +If the original path is not blank and it ends on backslash, +append backslash to the '.'. That is: +"abcd\.." -> "." +"abcd\..\' -> ".\" + +Return value: EXIT_FAILURE or EXIT_SUCCESS. */ +int norm_path_win(const std::string& path, std::string& result) +{ + result.clear(); + /* std::vector will free the allocated memory block + when this function returns. + + 3 - make sure that the buffer contains at least 3 chars, + that prevents buffer overread in the some checks. + + 1 - terminating '\0' */ + std::vector buf(path.length() + 3 + 1); + char* str = &buf[0]; + // end of string - terminating '\0' + char* str_end = g_stpcpy(str, path.c_str()); + char* root_end = path_root_end_win(str); + if(!root_end) + return EXIT_FAILURE; + /* + if(root_end == str && str[0] == '.' && (str[1] == '\\' || str[1] == '\0')) { + if(str[1] == '\0') + str += 1; + else + str += 2; + root_end = str; + } + */ + // if(str == root_end) - relative path + /*p1 and p2 points to the first char of a path component, + the previous char is normally '\\'. + In each step p2 moves to the next path component. + p1 normally moves forward as well, unless a parent directory reference + is encontered, then p1 moves back. */ + char * p1 = root_end; + char * p2 = root_end; + while(p2 < str_end) { + char *p = strchr(p2, '\\'); + if(!p) + p = str_end; + // [p2, p) - path component + if(p == p2) // empty path component - error + return EXIT_FAILURE; + if(p2[0] == '.' && p2[1] == '.' && p2 + 2 == p) { // parent directory + if(p1 == root_end) { // no component to strip + if(str == root_end) { // relative path + if(p1 != p2) { + p1[0] = '.'; + p1[1] = '.'; + p1[2] = *p; + } + size_t len = p + 1 - p2; + p1 += len; + p2 += len; + } else { // absolute path + return EXIT_FAILURE; // error + } + } else { // search a component to strip + char *p3 = strrchr_len(root_end, p1 - 1 - root_end, '\\'); + if(!p3) + p3 = root_end; + else + ++p3; + // p3 - beginning of the privious to p1 path component + if(p3[0] == '.' && p3[1] == '.' && p3[2] == '\\') { + g_assert(str == root_end); + // the previous component is "..", it cannot be stripped + if(p1 != p2) { + p1[0] = '.'; + p1[1] = '.'; + p1[2] = *p; + } + size_t len = p + 1 - p2; + p1 += len; + p2 += len; + } else { + p1 = p3; + p2 = p + 1; + } + } + } else if(p2[0] == '.' && p2 + 1 == p) { // strip "." component + p2 = p + 1; + } else { // normal directory + if(p1 == p2) { + p1 = p2 = p + 1; + } else { + size_t len = p + 1 - p2; + strncpy(p1, p2, len); + p1 += len; + p2 += len; + } + } + } + /* p1[-1] == '\0' if the last char of the path is not '\\' */ + *p1 = '\0'; + if(str[0] == '\0') { // blank path + str[0] = '.'; + if(!path.empty() && path[path.length()-1] == '\\') { + str[1] = '\\'; + str[2] = '\0'; + } else + str[1] = '\0'; + } + result = str; + return EXIT_SUCCESS; +} + +/* returns true if the path is absolute and false otherwise, +This function does not check that the path is valid +The following paths are accepted: +- an absolute path starting with disk name: "c:\", "c:\file", "c:\dir\file", ... + ("c:" is not allowed) +- an "absolute" path without disk: "\dir\file", ... - this path is considered relative! +- UNC name: "\\server", "\\server\dir", ... +-*/ +bool is_absolute_path_win(const std::string& path) +{ + const char* str = path.c_str(); + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + return true; + if(str[0] == '\\' && str[1] == '\\') + return true; + return false; +} + +/* applies a number of tests to the path +Returns true if all tests passed and false otherwise. */ +bool is_valid_path_win(const std::string& path) +{ + const char* str = path.c_str(); + /* End of the path prefix. + if "c:\abcd" then after "c:\" + if "\\abcd" then after "\\" + if "\abd" the after "\" + otherwise this the first char of the string. */ + const char* prefix_end = str; + if(g_ascii_isalpha(str[0]) && str[1] == ':' && str[2] == '\\') + prefix_end = str + 3; + else if(str[0] == '\\' && str[1] == '\\') + prefix_end = str + 2; + else if(str[0] == '\\') + prefix_end = str + 1; + if(prefix_end[0] == '\\') + return false; + if(strstr(prefix_end, "\\\\")) + return false; + if(strlen(prefix_end) != strcspn(prefix_end, "<>:\"/|?*")) + return false; + for(const char* p = prefix_end; *p; ++p) + if((unsigned char)*p < 32) + return false; + return true; +} + +/* create a relative path from directory base_dir to file or dir path +base_dir and path must have a common prefix, for example, +"c:\dir1\dir2" and "c:\dir1\dir3\dir4" -> "..\dir3\dir4" +Return value: EXIT_FAILURE or EXIT_SUCCESS. + +PathRelativePathTo fuction gives strange results: +"c:\\dir", "c:\\dir", "..\\dir", +"c:\\dir\\", "c:\\dir", "..\\dir" +"c:\\dir\\", "c:\\dir\\", "", +"\\", "\\a\\", - fails! + +That is why I've decided to provide a custom implementation. +base_dir and path must be absolute paths! +*/ +#if 0 +int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path) +{ + rel_path.clear(); + std_win_string base_dir_win; + std_win_string path_win; + if(!utf8_to_windows(base_dir, base_dir_win)) + return EXIT_FAILURE; + if(!utf8_to_windows(path, path_win)) + return EXIT_FAILURE; + if(base_dir_win.length() >= MAX_PATH) + return EXIT_FAILURE; + if(path_win.length() >= MAX_PATH) + return EXIT_FAILURE; + /* The output buffer must be at least MAX_PATH chars. + How much space do we actually need? */ + wchar_t buf[MAX_PATH * 10]; + bool is_file = !path.empty() && path[path.length()-1] != '\\'; + if(!PathRelativePathToW(buf, base_dir_win.c_str(), FILE_ATTRIBUTE_DIRECTORY, + path_win.c_str(), is_file ? 0 : FILE_ATTRIBUTE_DIRECTORY)) + return EXIT_FAILURE; + wchar_t * buf2 = buf; + if(buf[0] == L'.' && buf[1] == L'\\') + buf2 = buf + 2; + else if(buf[0] == L'.' && buf[1] == L'\0') + buf2 = buf + 1; + if(!windows_to_utf8(buf2, rel_path)) + return EXIT_FAILURE; + return EXIT_SUCCESS; +} +#endif + +int build_relative_path(const std::string& base_dir, const std::string& path, std::string& rel_path) +{ + rel_path.clear(); + std_win_string base_dir_win; + std_win_string path_win; + if(!utf8_to_windows(base_dir, base_dir_win)) + return EXIT_FAILURE; + if(!utf8_to_windows(path, path_win)) + return EXIT_FAILURE; + if(base_dir_win.empty()) + return EXIT_FAILURE; + if(path_win.empty()) + return EXIT_FAILURE; + /* Make sure that both paths end with a backslash, that simplifies further processing. + base_dir must be a directory, so adding a backslash won't hurt. + path may be either a file or a directory */ + if(base_dir_win[base_dir_win.length()-1] != L'\\') + base_dir_win += L'\\'; + if(path_win[path_win.length()-1] != L'\\') + path_win += L'\\'; + const wchar_t* c_base_dir_win = base_dir_win.c_str(); + const wchar_t* c_path_win = path_win.c_str(); + const wchar_t* base_dir_win_root_end = path_root_end_winW(c_base_dir_win); + const wchar_t* path_win_root_end = path_root_end_winW(c_path_win); + if(!base_dir_win_root_end || base_dir_win_root_end == c_base_dir_win) + return EXIT_FAILURE; + if(!path_win_root_end || path_win_root_end == c_path_win) + return EXIT_FAILURE; + if(base_dir_win_root_end - c_base_dir_win != path_win_root_end - c_path_win) + return EXIT_FAILURE; // different roots + if(StrCmpNI(c_base_dir_win, c_path_win, base_dir_win_root_end - c_base_dir_win)) + return EXIT_FAILURE; // different roots + /* p and q points to the end of the common part in base_dir_win and path_win respectively. */ + const wchar_t* p = base_dir_win_root_end; + const wchar_t* q = path_win_root_end; + while(true) + { + const wchar_t* p2 = StrChr(p, L'\\'); + const wchar_t* q2 = StrChr(q, L'\\'); + if(!p2 || !q2) + break; + p2++; + q2++; + if(p2 - p != q2 - q) + break; + if(StrCmpNI(p, q, p2-p)) + break; + p = p2; + q = q2; + } + // found the longest common part + /* calculate how many directories to strip from the base_dir + == number of backslashes after p */ + int parent_cnt = 0; + for(const wchar_t* r = StrChr(p, L'\\'); r; r = StrChr(r+1, L'\\')) + ++parent_cnt; + std_win_string rel_path_win; + rel_path_win.reserve(3 * parent_cnt + wcslen(q)); + for(int i=0; i buffer(buffer_size); + char* buf = &buffer[0]; + gulong len; + clib::File out_file(g_fopen(out_file_name, "wb")); + if(!out_file) { + g_critical(open_write_file_err, out_file_name); + return EXIT_FAILURE; + } + while(true) { + len = gzread(get_impl(in), buf, buffer_size); + if(len < 0) { + g_critical(read_file_err, arch_file_name, ""); + return EXIT_FAILURE; + } + if(len == 0) + break; + if(1 != fwrite(buf, len, 1, get_impl(out_file))) { + g_critical(write_file_err, out_file_name); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; +} + +const std::string& TempFile::create_temp_file(void) +{ + clear(); + file_name = ::create_temp_file(); + if(file_name.empty()) + g_critical(create_temp_file_no_name_err); + return file_name; +} + +void TempFile::clear(void) +{ + if(!file_name.empty()) { + if(g_remove(file_name.c_str())) + g_warning(remove_temp_file_err, file_name.c_str()); + file_name.clear(); + } +} + +std::string create_temp_file(void) +{ +#ifdef _WIN32 + /* g_file_open_tmp does not work reliably on Windows + Use platform specific API here. */ + { + UINT uRetVal = 0; + DWORD dwRetVal = 0; + TCHAR szTempFileName[MAX_PATH]; + TCHAR lpTempPathBuffer[MAX_PATH]; + dwRetVal = GetTempPath(MAX_PATH, lpTempPathBuffer); + if (dwRetVal > MAX_PATH || (dwRetVal == 0)) + return ""; + + uRetVal = GetTempFileName(lpTempPathBuffer, // directory for tmp files + TEXT("temp"), // temp file name prefix + 0, // create unique name + szTempFileName); // buffer for name + if (uRetVal == 0) + return ""; + std::string tmp_url_utf8; + std::string tmp_url; + if(!windows_to_utf8(szTempFileName, tmp_url_utf8) + || !utf8_to_file_name(tmp_url_utf8, tmp_url)) + return ""; + FILE * f = g_fopen(tmp_url.c_str(), "wb"); + if(!f) + return ""; + fwrite(" ", 1, 1, f); + fclose(f); + return tmp_url; + } +#else + { + std::string tmp_url; + gchar * buf = NULL; + gint fd = g_file_open_tmp(NULL, &buf, NULL); + if(fd == -1) + return ""; + tmp_url = buf; + g_free(buf); + ssize_t write_size; + write_size = write(fd, " ", 1); + if (write_size == -1) { + g_print("write error!\n"); + } + close(fd); + return tmp_url; + } +#endif +} + +bool is_known_resource_type(const char* str) +{ + for(size_t i=0; known_resource_types[i]; ++i) + if(strcmp(str, known_resource_types[i]) == 0) + return true; + return false; +} + +/* trim string src + * new_beg is set to new beginning of the string + * new_len length of the new string in bytes + * The source string is not modified. */ +void trim_spaces(const char* const src, const char*& new_beg, size_t& new_len) +{ + new_beg = src; + while(*new_beg && g_unichar_isspace(g_utf8_get_char(new_beg))) { + new_beg = g_utf8_next_char(new_beg); + } + const char* p = new_beg; + const char* end = p; + while(*p) { + if(!g_unichar_isspace(g_utf8_get_char(p))) + end = p; + p = g_utf8_next_char(p); + } + if(*end) + end = g_utf8_next_char(end); + new_len = end - new_beg; +} + +/* truncate utf8 string on char boundary (string content is not changed, + * instead desired new length is returned) + * new string length must be <= max_len + * beg - first char of the string, + * str_len - string length in bytes + * return value: length of the truncated string */ +size_t truncate_utf8_string(const char* const beg, const size_t str_len, const size_t max_len) +{ + if(str_len <= max_len) + return str_len; + if(max_len == 0) + return 0; + const char* char_end = beg+max_len; + const char* p = beg+max_len-1; + while(true) { + // find the first byte of a utf8 char + for(; beg <= p && (*p & 0xC0) == 0x80; --p) + ; + if(p& chars) { + std::stringstream buf; + bool add_splitter = false; + for(std::list::const_iterator it = chars.begin(); it != chars.end(); ++it) { + if(add_splitter) + buf << ", "; + buf << static_cast(g_utf8_get_char(*it)); + add_splitter = true; + } + return buf.str(); +} + +char* strrchr_len(char* str, size_t size, char c) +{ + for(char *p = str + size - 1; str <= p; --p) + if(*p == c) + return p; + return NULL; +} + +bool is_ascii_alpha(wchar_t ch) +{ + static const wchar_t alphabet[] = + L"abcdefghijklmnopqrstuvwxyz" + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + for(size_t i=0; i= filepath.length()) + return ""; + std::string::size_type pos2 = filepath.find_last_of('.'); + if(pos2 == std::string::npos || pos2 < pos) + return filepath.substr(pos); + return filepath.substr(pos, pos2-pos); +} + +/* remove the item at path + * if this is a regular file, removed the file; + * if this is a symbolic line, remove the link; + * if this is a directory, remove the directory recursively. + * Return value: EXIT_SUCCESS or EXIT_FAILURE + * */ +int remove_recursive(const std::string& path) +{ + int res = EXIT_SUCCESS; + if(g_file_test(path.c_str(),G_FILE_TEST_IS_DIR)) { + // change file mode so we can read directory and remove items from it + // If we cannot read mode or change it, go on, maybe we can remove the dir anyway. + stardict_stat_t stats; + if(!g_stat(path.c_str(), &stats)) { + // full access for everyone + g_chmod(path.c_str(), stats.st_mode | (S_IRWXU|S_IRWXG|S_IRWXO)); + } + glib::Dir dir(g_dir_open(path.c_str(), 0, NULL)); + if(!dir) + res = EXIT_FAILURE; + else { + std::string dirpath(path); // directory path ending with a dir separator + if(dirpath[dirpath.length()-1] != G_DIR_SEPARATOR) + dirpath += G_DIR_SEPARATOR; + const gchar * filename; + while((filename = g_dir_read_name(get_impl(dir)))) { + if (strcmp(filename, ".") == 0 || strcmp(filename, "..") == 0) + continue; + const std::string itempath(dirpath + filename); + if(remove_recursive(itempath.c_str())) + res = EXIT_FAILURE; + } + } + if(g_rmdir(path.c_str())) + res = EXIT_FAILURE; + return res; + } else { + if(g_remove(path.c_str())) + res = EXIT_FAILURE; + return res; + } +} diff --git a/lib/stardict/libcommon.h b/lib/stardict/libcommon.h new file mode 100644 index 0000000..1689bec --- /dev/null +++ b/lib/stardict/libcommon.h @@ -0,0 +1,260 @@ +/* + * Copyright 2011 kubtek + * + * This file is part of StarDict. + * + * StarDict is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * StarDict is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Return file name in file name encoding. +Return an empty string if file cannot be created. */ +std::string create_temp_file(void); + +extern const char* known_resource_types[]; + +bool is_known_resource_type(const char* str); + +void trim_spaces(const char* const src, const char*& new_beg, size_t& new_len); +size_t truncate_utf8_string(const char* const beg, const size_t str_len, const size_t max_len); +std::string fix_utf8_str(const std::string& str, char replacement_char = '?'); +std::string print_char_codes(const std::list& chars); +char* strrchr_len(char* str, size_t size, char c); +bool is_ascii_alpha(wchar_t ch); +std::string get_basename_without_extension(const std::string& filepath); +int remove_recursive(const std::string& path); + +#define UTF8_BOM "\xEF\xBB\xBF" + +#define known_type_ids \ + "mtygxkwhnr" + +#define file_not_found_err \ + "File does not exist: '%s'" +#define dir_not_found_err \ + "Directory does not exist: '%s'" +#define read_file_err \ + "Error reading file: '%s'. Error: %s." +#define write_file_err \ + "Error writing file: '%s'." +#define open_read_file_err \ + "Unable open file for reading: '%s'. Error: %s." +#define open_write_file_err \ + "Unable open file for writing: '%s'." +#define create_temp_file_err \ + "Unable to create a temporary file: '%s'." +#define create_temp_file_no_name_err \ + "Unable to create a temporary file." +#define remove_temp_file_err \ + "Unable to remove a temporary file: '%s'." +#define copy_file_err \ + "Error copying file from '%s' to '%s'. Error: %s" +#define create_dir_err \ + "Unable to create directory '%s'. Error: %s" +#define open_dir_err \ + "Unable to open directory '%s'. Error: %s" +#define incorrect_arg_err \ + "Incorrect argument." +#define fixed_ignore_msg \ + "The problem was fixed. Ignore the problem." +#define fixed_msg \ + "The problem was fixed." +#define fixed_msg2 \ + "The problem was fixed. " + +/* Maximum size of word in index. strlen(word) < MAX_INDEX_KEY_SIZE. + * See doc/StarDictFileFormat. */ +const int MAX_INDEX_KEY_SIZE=256; + +#endif + diff --git a/lib/tlpi-lib/Build_ename.sh b/lib/tlpi-lib/Build_ename.sh new file mode 100644 index 0000000..d2eb014 --- /dev/null +++ b/lib/tlpi-lib/Build_ename.sh @@ -0,0 +1,53 @@ +#!/bin/sh +# +# Create a new version of the file ename.c.inc by parsing symbolic +# error names defined in errno.h +# +echo '#include ' | cpp -dM | +sed -n -e '/#define *E/s/#define *//p' |sort -k2n | +awk ' +BEGIN { + entries_per_line = 4 + line_len = 68; + last = 0; + varname =" enames"; + print "static char *ename[] = {"; + line = " /* 0 */ \"\""; +} + +{ + if ($2 ~ /^E[A-Z0-9]*$/) { # These entries are sorted at top + synonym[$1] = $2; + } else { + while (last + 1 < $2) { + last++; + line = line ", "; + if (length(line ename) > line_len || last == 1) { + print line; + line = " /* " last " */ "; + line = sprintf(" /* %3d */ ", last); + } + line = line "\"" "\"" ; + } + last = $2; + ename = $1; + for (k in synonym) + if (synonym[k] == $1) ename = ename "/" k; + + line = line ", "; + if (length(line ename) > line_len || last == 1) { + print line; + line = " /* " last " */ "; + line = sprintf(" /* %3d */ ", last);; + } + line = line "\"" ename "\"" ; + } +} +END { + print line; + print "};" + print ""; + print "#define MAX_ENAME " last; +} +' + diff --git a/lib/tlpi-lib/Makefile b/lib/tlpi-lib/Makefile new file mode 100644 index 0000000..c4b81ac --- /dev/null +++ b/lib/tlpi-lib/Makefile @@ -0,0 +1,27 @@ +# Makefile to build library used by all programs +# +# This make file relies on the assumption that each C file in this +# directory belongs in the library +# +# This makefile is very simple so that every version of make +# should be able to handle it +# +include Makefile.inc + +# The library build is "brute force" -- we don't bother with +# dependency checking. + +allgen : ${TLPI_LIB} + +${TLPI_LIB} : *.cpp ename.c.inc + ${CXX} -c -g ${CXXFLAGS} -Wno-write-strings *.cpp + ${RM} ${TLPI_LIB} + ${AR} rs libtlpi.a *.o + +ename.c.inc : + sh Build_ename.sh > ename.c.inc + echo 1>&2 "ename.c.inc built" + +clean : + ${RM} *.o ename.c.inc ${TLPI_LIB} + diff --git a/lib/tlpi-lib/Makefile.inc b/lib/tlpi-lib/Makefile.inc new file mode 100644 index 0000000..bb0934d --- /dev/null +++ b/lib/tlpi-lib/Makefile.inc @@ -0,0 +1,49 @@ +# Makefile.inc - common definitions used by all makefiles + +TLPI_DIR = .. +TLPI_LIB = ${TLPI_DIR}/libtlpi.a +TLPI_INCL_DIR = ${TLPI_DIR}/lib + +LINUX_LIBRT = -lrt +LINUX_LIBDL = -ldl +LINUX_LIBACL = -lacl +LINUX_LIBCRYPT = -lcrypt +LINUX_LIBCAP = -lcap + +# "-Wextra" is a more descriptive synonym for "-W", but only +# available in more recent gcc versions + +# Defining _DEFAULT_SOURCE is a workaround to avoid the warnings that +# would otherwise be produced when compiling code that defines _BSD_SOURCE +# or _SVID_SOURCE against glibc headers in version 2.20 and later. +# (The alternative would be to replace each instance of "#define _SVID_SOURCE" +# or "#define _BSD_SOURCE" in the example programs with +# "#define _DEFAULT_SOURCE".) + +IMPL_CFLAGS = -D_XOPEN_SOURCE=600 \ + -D_DEFAULT_SOURCE \ + -g -I${TLPI_INCL_DIR} \ + -pedantic \ + -Wall \ + -W \ + -Wno-sign-compare \ + -Wno-unused-parameter \ + -Wno-write-strings + +# clang(1) is a little more zealous than gcc(1) with respect to some warnings. +# Suppress those warnings (which, at least in the book code, relate to code +# that is fine). + +ifeq ($(CC),clang) + IMPL_CFLAGS += -Wno-uninitialized -Wno-infinite-recursion +endif + +CFLAGS = ${IMPL_CFLAGS} + +IMPL_THREAD_FLAGS = -pthread + +IMPL_LDLIBS = ${TLPI_LIB} + +LDLIBS = ${IMPL_LDLIBS} + +RM = rm -f diff --git a/lib/tlpi-lib/README b/lib/tlpi-lib/README new file mode 100644 index 0000000..1815ba2 --- /dev/null +++ b/lib/tlpi-lib/README @@ -0,0 +1,7 @@ +A small design note... Many of the library functions defined in the +source code modules in this directory handle errors from system calls +and C library functions by simply terminating the process. This +isn't acceptable design for a "real world" suite of library functions; +I did things this way to keep the source code simpler and shorter. +A properly designed function should indicate an error to its caller +using a status argument or some special function return value. diff --git a/lib/tlpi-lib/ename.c.inc b/lib/tlpi-lib/ename.c.inc new file mode 100644 index 0000000..907a01f --- /dev/null +++ b/lib/tlpi-lib/ename.c.inc @@ -0,0 +1,35 @@ +static char *ename[] = { + /* 0 */ "", + /* 1 */ "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO", "ENXIO", + /* 7 */ "E2BIG", "ENOEXEC", "EBADF", "ECHILD", + /* 11 */ "EAGAIN/EWOULDBLOCK", "ENOMEM", "EACCES", "EFAULT", + /* 15 */ "ENOTBLK", "EBUSY", "EEXIST", "EXDEV", "ENODEV", + /* 20 */ "ENOTDIR", "EISDIR", "EINVAL", "ENFILE", "EMFILE", + /* 25 */ "ENOTTY", "ETXTBSY", "EFBIG", "ENOSPC", "ESPIPE", + /* 30 */ "EROFS", "EMLINK", "EPIPE", "EDOM", "ERANGE", + /* 35 */ "EDEADLK/EDEADLOCK", "ENAMETOOLONG", "ENOLCK", "ENOSYS", + /* 39 */ "ENOTEMPTY", "ELOOP", "", "ENOMSG", "EIDRM", "ECHRNG", + /* 45 */ "EL2NSYNC", "EL3HLT", "EL3RST", "ELNRNG", "EUNATCH", + /* 50 */ "ENOCSI", "EL2HLT", "EBADE", "EBADR", "EXFULL", "ENOANO", + /* 56 */ "EBADRQC", "EBADSLT", "", "EBFONT", "ENOSTR", "ENODATA", + /* 62 */ "ETIME", "ENOSR", "ENONET", "ENOPKG", "EREMOTE", + /* 67 */ "ENOLINK", "EADV", "ESRMNT", "ECOMM", "EPROTO", + /* 72 */ "EMULTIHOP", "EDOTDOT", "EBADMSG", "EOVERFLOW", + /* 76 */ "ENOTUNIQ", "EBADFD", "EREMCHG", "ELIBACC", "ELIBBAD", + /* 81 */ "ELIBSCN", "ELIBMAX", "ELIBEXEC", "EILSEQ", "ERESTART", + /* 86 */ "ESTRPIPE", "EUSERS", "ENOTSOCK", "EDESTADDRREQ", + /* 90 */ "EMSGSIZE", "EPROTOTYPE", "ENOPROTOOPT", + /* 93 */ "EPROTONOSUPPORT", "ESOCKTNOSUPPORT", + /* 95 */ "EOPNOTSUPP/ENOTSUP", "EPFNOSUPPORT", "EAFNOSUPPORT", + /* 98 */ "EADDRINUSE", "EADDRNOTAVAIL", "ENETDOWN", "ENETUNREACH", + /* 102 */ "ENETRESET", "ECONNABORTED", "ECONNRESET", "ENOBUFS", + /* 106 */ "EISCONN", "ENOTCONN", "ESHUTDOWN", "ETOOMANYREFS", + /* 110 */ "ETIMEDOUT", "ECONNREFUSED", "EHOSTDOWN", "EHOSTUNREACH", + /* 114 */ "EALREADY", "EINPROGRESS", "ESTALE", "EUCLEAN", + /* 118 */ "ENOTNAM", "ENAVAIL", "EISNAM", "EREMOTEIO", "EDQUOT", + /* 123 */ "ENOMEDIUM", "EMEDIUMTYPE", "ECANCELED", "ENOKEY", + /* 127 */ "EKEYEXPIRED", "EKEYREVOKED", "EKEYREJECTED", + /* 130 */ "EOWNERDEAD", "ENOTRECOVERABLE", "ERFKILL", "EHWPOISON" +}; + +#define MAX_ENAME 133 diff --git a/lib/tlpi-lib/error_functions.cpp b/lib/tlpi-lib/error_functions.cpp new file mode 100644 index 0000000..e819c16 --- /dev/null +++ b/lib/tlpi-lib/error_functions.cpp @@ -0,0 +1,204 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. + ename[err] : "?UNKNOWN?", strerror(err)); + else + snprintf(errText, BUF_SIZE, ":"); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-truncation" + snprintf(buf, BUF_SIZE, "ERROR%s %s\n", errText, userMsg); +#pragma GCC diagnostic pop + + if (flushStdout) + fflush(stdout); /* Flush any pending stdout */ + fputs(buf, stderr); + fflush(stderr); /* In case stderr is not line-buffered */ +} + +/* Display error message including 'errno' diagnostic, and + return to caller */ + +void +errMsg(const char *format, ...) +{ + va_list argList; + int savedErrno; + + savedErrno = errno; /* In case we change it here */ + + va_start(argList, format); + outputError(TRUE, errno, TRUE, format, argList); + va_end(argList); + + errno = savedErrno; +} + +/* Display error message including 'errno' diagnostic, and + terminate the process */ + +void +errExit(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errno, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Display error message including 'errno' diagnostic, and + terminate the process by calling _exit(). + + The relationship between this function and errExit() is analogous + to that between _exit(2) and exit(3): unlike errExit(), this + function does not flush stdout and calls _exit(2) to terminate the + process (rather than exit(3), which would cause exit handlers to be + invoked). + + These differences make this function especially useful in a library + function that creates a child process that must then terminate + because of an error: the child must terminate without flushing + stdio buffers that were partially filled by the caller and without + invoking exit handlers that were established by the caller. */ + +void +err_exit(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errno, FALSE, format, argList); + va_end(argList); + + terminate(FALSE); +} + +/* The following function does the same as errExit(), but expects + the error number in 'errnum' */ + +void +errExitEN(int errnum, const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(TRUE, errnum, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Print an error message (without an 'errno' diagnostic) */ + +void +fatal(const char *format, ...) +{ + va_list argList; + + va_start(argList, format); + outputError(FALSE, 0, TRUE, format, argList); + va_end(argList); + + terminate(TRUE); +} + +/* Print a command usage error message and terminate the process */ + +void +usageErr(const char *format, ...) +{ + va_list argList; + + fflush(stdout); /* Flush any pending stdout */ + + fprintf(stderr, "Usage: "); + va_start(argList, format); + vfprintf(stderr, format, argList); + va_end(argList); + + fflush(stderr); /* In case stderr is not line-buffered */ + exit(EXIT_FAILURE); +} + +/* Diagnose an error in command-line arguments and + terminate the process */ + +void +cmdLineErr(const char *format, ...) +{ + va_list argList; + + fflush(stdout); /* Flush any pending stdout */ + + fprintf(stderr, "Command-line usage error: "); + va_start(argList, format); + vfprintf(stderr, format, argList); + va_end(argList); + + fflush(stderr); /* In case stderr is not line-buffered */ + exit(EXIT_FAILURE); +} diff --git a/lib/tlpi-lib/error_functions.h b/lib/tlpi-lib/error_functions.h new file mode 100644 index 0000000..8c2817b --- /dev/null +++ b/lib/tlpi-lib/error_functions.h @@ -0,0 +1,55 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. NORETURN ; + +void usageErr(const char *format, ...) NORETURN ; + +void cmdLineErr(const char *format, ...) NORETURN ; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/tlpi-lib/get_num.cpp b/lib/tlpi-lib/get_num.cpp new file mode 100644 index 0000000..e8e1ad2 --- /dev/null +++ b/lib/tlpi-lib/get_num.cpp @@ -0,0 +1,103 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. You may use, modify, and redistribute it * +* under the terms of the GNU Lesser General Public License as published * +* by the Free Software Foundation, either version 3 or (at your option) * +* any later version. 'flags' is a bit mask of flags controlling + how the conversion is done and what diagnostic checks are performed on the + numeric result; see get_num.h for details. + + 'fname' is the name of our caller, and 'name' is the name associated with + the command-line argument 'arg'. See the + comments for getNum() for a description of the arguments to this function. */ + +long +getLong(const char *arg, int flags, const char *name) +{ + return getNum("getLong", arg, flags, name); +} + +/* Convert a numeric command-line argument string to an integer. See the + comments for getNum() for a description of the arguments to this function. */ + +int +getInt(const char *arg, int flags, const char *name) +{ + long res; + + res = getNum("getInt", arg, flags, name); + + if (res > INT_MAX || res < INT_MIN) + gnFail("getInt", "integer out of range", arg, name); + + return (int) res; +} diff --git a/lib/tlpi-lib/get_num.h b/lib/tlpi-lib/get_num.h new file mode 100644 index 0000000..7b5a0b1 --- /dev/null +++ b/lib/tlpi-lib/get_num.h @@ -0,0 +1,32 @@ +/*************************************************************************\ +* Copyright (C) Michael Kerrisk, 2018. * +* * +* This program is free software. (m) : (n)) +#define max(m,n) ((m) > (n) ? (m) : (n)) + +/* Some systems don't define 'socklen_t' */ + +#if defined(__sgi) +typedef int socklen_t; +#endif + +#if defined(__sun) +#include /* Has definition of FASYNC */ +#endif + +#if ! defined(O_ASYNC) && defined(FASYNC) +/* Some systems define FASYNC instead of O_ASYNC */ +#define O_ASYNC FASYNC +#endif + +#if defined(MAP_ANON) && ! defined(MAP_ANONYMOUS) +/* BSD derivatives usually have MAP_ANON, not MAP_ANONYMOUS */ +#define MAP_ANONYMOUS MAP_ANON + +#endif + +#if ! defined(O_SYNC) && defined(O_FSYNC) +/* Some implementations have O_FSYNC instead of O_SYNC */ +#define O_SYNC O_FSYNC +#endif + +#if defined(__FreeBSD__) + +/* FreeBSD uses these alternate names for fields in the sigval structure */ + +#define sival_int sigval_int +#define sival_ptr sigval_ptr +#endif + +#endif + +#endif