This function is about the fact that a std::wstring was used in another cpp file in order to be able to read strings with German umlauts from the console. Since it is difficult to get wstrings into a text file when a std::ofstream is already accessing the text file, this wstring was converted into a normal std::string using utf8.h. The 16-bit characters that represented umlauts are now 2 cryptic characters (which is logical, I know). A ß becomes ß, an ü becomes ü, as you often see it in everyday life. This is corrected with this .h and .cpp files.
My question is: Could you please review this function and say what you think of the code? I'm asking because the code is copying one vector to a second one a lot, and, as you see in the last lines, I need to get rid of ‘left-over’ null characters. I want to include the header and cpp file more often, so I want the two to be good.
handle_German_umlauts.cpp
#include "handle_German_umlauts.h" Umlaute_korrigieren::Umlaute_korrigieren() { } Umlaute_korrigieren::~Umlaute_korrigieren() { } std::vector<char> Umlaute_korrigieren::_std__String_to_std__vectorChar_for_ANSI(std::string stdstring) { std::vector<char> CString(stdstring.c_str(), stdstring.c_str() + stdstring.size() + 1); std::vector<char> copy(stdstring.c_str(), stdstring.c_str() + stdstring.size() + 1); for (size_t i = (size_t)0; i < CString.size() - (size_t)1; i++) { if (CString[i] == -61 && CString[i + 1] == -97) // Pseudo-ß gefunden { copy[i] = '\xDF'; //ß ist DF(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i+(size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -68) // Pseudo-ü gefunden { copy[i] = '\xFC'; //ü ist FC(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -92) // Pseudo-ä gefunden { copy[i] = '\xE4'; //ä ist E4(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -74) // Pseudo-ö gefunden { copy[i] = '\xF6'; //ö ist F6(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -124) // Pseudo-Ä gefunden { copy[i] = '\xC4'; //Ä ist C4(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -106) // Pseudo-Ö gefunden { copy[i] = '\xD6'; //Ö ist D6(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } if (CString[i] == -61 && CString[i + 1] == -100) // Pseudo-Ü gefunden { copy[i] = '\xDC'; //Ü ist DC(hex) in ANSI for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren { copy[j - 1] = CString[j]; } CString = copy; } } // crop unnecessary ‘\0’s size_t _0Counter = 0; for (size_t i = (size_t)0; i < CString.size(); i++) { if (CString[i] == '\0') { _0Counter += (size_t)1; } } size_t original = CString.size() - (size_t)1; // because the vector gets smaller due to the deletion and the for loop is always reevaluating size_t wie_weit = CString.size() - _0Counter; for (size_t i = original; i > wie_weit; i--) { CString.erase(CString.begin() + i - 1); } return CString; } The handle_German_umlauts.h
#ifndef HANDLE_GERMAN_UMLAUTS_H_ #define HANDLE_GERMAN_UMLAUTS_H_ #include <vector> #include <string> class Umlaute_korrigieren { public: Umlaute_korrigieren(); ~Umlaute_korrigieren(); std::vector<char> _std__String_to_std__vectorChar_for_ANSI(std::string); private: }; #endif // !HANDLE_GERMAN_UMLAUTS_H_ The function is called as follows:
std::string Strasse_als_stdstring; utf8::utf16to8(physical_address.street.begin(), physical_address.street.end(), back_inserter(Strasse_als_stdstring)); std::vector<char> korrigierte_Strasse = Uk._std__String_to_std__vectorChar_for_ANSI(Strasse_als_stdstring); for (size_t h = (size_t)0; h < korrigierte_Strasse.size() - (size_t)3; h++) // write to txt. -3, so that \r\n\0 aren't printed. { fs8 << korrigierte_Strasse[h]; } fs8 << " " << physical_address.house_number << std::endl; where physical_address.street is the std::wstring (mentioned above), and the for loop serves to write the chars in the textfile (std::ofstream fs8).
