/** pahooNormalizeText.cpp
 * eLXgKNXFC++\[X
 *
 * @copyright	(c)studio pahoo
 * @author		ppςӂ
 * @	MinGW C++ + Boost C++ Libraries + MeCab
 * @QlURL		https://www.pahoo.org/e-soul/webtech/cpp01-18-01.shtm
 */
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <tchar.h>
#include <vector>
#include <string>
#include <locale>
#include <regex>
#include <sstream>
#include <cmath>
#include <winsock2.h>
#include <windows.h>
#include <commctrl.h>
#include <algorithm>
#include <locale>
#ifdef MECAB
	#include <mecab.h>
#endif
#include <boost/algorithm/string.hpp>
#include <boost/foreach.hpp>
#include "pahooNormalizeText.hpp"

using namespace std;

// R[hϊ =========================================================
/**
 * eLXgER[hϊFSJISwstringiWindows APIgpj
 * @param  string src SJISeLXg
 * @return string ϊeLXg
*/
std::wstring __sjis_wstring(std::string src) {
	auto const dest_size = ::MultiByteToWideChar(CP_ACP, 0U, src.data(), -1, nullptr, 0U);
	std::vector<wchar_t> dest(dest_size, L'\0');

	if (::MultiByteToWideChar(CP_ACP, 0U, src.data(), -1, dest.data(), dest.size()) == 0) {
		throw std::system_error{static_cast<int>(::GetLastError()), std::system_category()};
	}
	dest.resize(std::char_traits<wchar_t>::length(dest.data()));
	dest.shrink_to_fit();

	return std::wstring(dest.begin(), dest.end());
}

/**
 * eLXgER[hϊFwstringSJISiWindows APIgpj
 * @param  wstring src eLXg
 * @return string ϊeLXgiSJISj
*/
std::string __wstring_sjis(std::wstring src) {
	auto const dest_size = ::WideCharToMultiByte(CP_ACP, 0U, src.data(), -1, nullptr, 0, nullptr, nullptr);
	std::vector<char> dest(dest_size, '\0');
	if (::WideCharToMultiByte(CP_ACP, 0U, src.data(), -1, dest.data(), dest.size(), nullptr, nullptr) == 0) {
		throw std::system_error{static_cast<int>(::GetLastError()), std::system_category()};
	}
	dest.resize(std::char_traits<char>::length(dest.data()));
	dest.shrink_to_fit();

	return std::string(dest.begin(), dest.end());
}

/**
 * eLXgER[hϊFUTF-8wstringiWindows APIgpj
 * @param  string src UTF-8eLXg
 * @return string ϊeLXg
*/
std::wstring __utf8_wstring(std::string src) {
	auto const dest_size = ::MultiByteToWideChar(CP_UTF8, 0U, src.data(), -1, nullptr, 0U);
	std::vector<wchar_t> dest(dest_size, L'\0');
	if (::MultiByteToWideChar(CP_UTF8, 0U, src.data(), -1, dest.data(), dest.size()) == 0) {
		throw std::system_error{static_cast<int>(::GetLastError()), std::system_category()};
	}
	dest.resize(std::char_traits<wchar_t>::length(dest.data()));
	dest.shrink_to_fit();

	return std::wstring(dest.begin(), dest.end());
}

/**
 * eLXgER[hϊFwstringUTF-8iWindows APIgpj
 * @param  wstring src eLXg
 * @return string ϊeLXgiUTF-8j
*/
std::string __wstring_utf8(std::wstring src) {
	auto const dest_size = ::WideCharToMultiByte(CP_UTF8, 0U, src.data(), -1, nullptr, 0, nullptr, nullptr);
	std::vector<char> dest(dest_size, '\0');

	if (::WideCharToMultiByte(CP_UTF8, 0U, src.data(), -1, dest.data(), dest.size(), nullptr, nullptr) == 0) {
		throw std::system_error{static_cast<int>(::GetLastError()), std::system_category()};
	}
	dest.resize(std::char_traits<char>::length(dest.data()));
	dest.shrink_to_fit();

	return std::string(dest.begin(), dest.end());
}

/**
 * eLXgER[hϊFSJISUTF-8iWindows APIgpj
 * @param  wstring src SJISeLXg
 * @return string ϊeLXgiUTF-8j
*/
std::string sjis_utf8(std::string src) {
	wstring const wide = __sjis_wstring(src);
	return __wstring_utf8(wide);
}

/**
 * eLXgER[hϊFUTF-8SJISiWindows APIgpj
 * @param  wstring src UTF-8eLXg
 * @return string ϊeLXgiUTF-8j
*/
std::string utf8_sjis(std::string src) {
	wstring const wide = __utf8_wstring(src);
	return __wstring_sjis(wide);
}

/**
 * {eLXgϊiWin32 APIpj
 * @param	wstring sour   ϊeLXg
 * @param	DWORD   method ϊ
 * @return	wstring ϊeLXg
 */
wstring wconvString(wstring wsour, DWORD method) {
	// _E_͑Spp2ɑ̂ŕϊ̈͗]T
	wchar_t wdest[wsour.length() * 2 + 1];
	for (size_t i = 0; i < wsour.length() * 2 + 1; i++) {
		wdest[i] = 0L;
	}
	// ϊs
	LCMapStringW(LOCALE_SYSTEM_DEFAULT, method, (LPWSTR) wsour.c_str(),
			wsour.length(), (LPWSTR) wdest, wsour.length() * 2);

	return (wstring) wdest;
}

// eLXgKNX ======================================================
/**
 * RXgN^
 */
pahooNormalizeText::pahooNormalizeText() {
#ifdef MECAB
	// MeCab`FbN
	if (! this->tagger) {
		const char *msg = MeCab::getTaggerError();
		this->setError(_SW("MeCabCXg[Ă\n\n") + _SW((string)msg));
	}
	// \Lꓝꎫɂij
	VariableDict.clear();
#endif
}

// fXgN^
pahooNormalizeText::~pahooNormalizeText() {
}

/**
 * G[
 * @param	Ȃ
 * @return	bool TRUE:ُ^FALSE:
*/
bool pahooNormalizeText::isError(void) {
	return this->errmsg.length() > 0;
}

/**
 * G[bZ[W擾
 * @param	Ȃ
 * @return	wstring ݔĂG[bZ[W
*/
wstring pahooNormalizeText::getError(void) {
	return this->errmsg;
}

/**
 * G[Zbg
 * @param	Ȃ
 * @return	Ȃ
*/
void pahooNormalizeText::resetError(void) {
	this->errmsg = L"";
}

/**
 * G[Zbg
 * @param	wstring msg G[bZ[W
 * @return	Ȃ
*/
void pahooNormalizeText::setError(wstring msg) {
	this->errmsg = msg;
}

/**
 * tgGhuF͒ɕus
 * @param	string wsour ̓eLXg
 * @return	string ϊ㕶
*/
wstring pahooNormalizeText::frontend_replace(wstring wsour) {
	wstring tbl[2];

	// u`t@Cǂݍ
	ifstream ifs(FRONT_REPLACE_CSV);
	// u`t@C
	if (! ifs) {
		return wsour;
	}

	// 1sϊ
	string ss;
	string delim = "\t";
	list<string> list_string;
	wstring wstr = L"";
	while (getline(ifs, ss)) {
		int n = 0;
		boost::split(list_string, ss, boost::is_any_of(delim));
		BOOST_FOREACH(string s, list_string) {
			tbl[n] = _SW(s);
			n++;
			if (n > 2)	break;
		}
		wregex re(tbl[0]);
		wsour = regex_replace(wsour, re, tbl[1]);
	}

	return wsour;
}

/**
 * pɕϊ
 * @param	wstring instr p
 *							AɑΉGw\Lɂ͖Ή
 *							J}͍폜
 * @return	wstring 
*/
wstring pahooNormalizeText::num2kanji(wstring instr) {
	static wchar_t kantbl1[] =
		{ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
			L'8', L'9', L'.', L'-' };
	static wchar_t kantbl2[] =
		{ 0x0000, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D,	// `
			0x4E03, 0x516B, 0x4E5D, 0xFF0E, 0xFF0D };				// D|
	static wchar_t kantbl3[] = { 0x0000, 0x5341, 0x767E, 0x5343 };	// \S
	static wchar_t kantbl4[] = { 0x0000, 0x4E07, 0x5104, 0x5146, 0x4EAC };	// 

	wstring outstr = L"";;
	wstring ws2;
	wchar_t wch1, wch2;
	int m = (int)instr.length() / 4;
	// AAAdďJԂ
	for (int i = 0; i <= m; i++) {
		ws2 = L"";
		// A\ASǍJԂ
		for (int j = 0; j < 4; j++) {
			int pos = instr.length() - i * 4 - j - 1;
			if (pos >= 0) {
				wchar_t* wch  = (wchar_t*)instr.substr(pos, 1).c_str();
				if (*wch == L',')	continue;		// J}͖
				for (int k = 0; k < (int)(sizeof(kantbl1) / sizeof(kantbl1[0])); k++) {
					//  or p̂܂
					wch1 = 0x0000;
					if (*wch == kantbl1[k]) {
						wch1 = kantbl2[k];
						break;
					}
				}
				wch2 = 0x0000;;
				if ((j >= 0) && (j <= 3)) {
					wch2 = kantbl3[j];
				}

				// `uv̏ꍇ̏
				if (wch1 != 0x0000) {
					if ((wch1 == 0x4E00) && (wch2 != 0x0000)) {
						ws2 = (wstring){wch2} + ws2;
					} else if (wch2 != 0x0000) {
						ws2 = (wstring){wch1} + (wstring){wch2} + ws2;
					} else {
						ws2 = (wstring){wch1} + ws2;
					}
				}
			}
		}
		if (ws2 != L"") {
			if (kantbl4[i] == 0x0000) {
				outstr = ws2 + outstr;
			} else {
				outstr = ws2 + (wstring){kantbl4[i]} + outstr;
			}
		}
	}

	return outstr;
}

/**
 * pʎL@ɕϊ
 * @param	wstring instr piCCw\L͖Ήj
 * @return	wstring ʎL@
*/
wstring pahooNormalizeText::num2scale(wstring instr) {
	static wchar_t kantbl[] = { 0x0000, 0x4E07, 0x5104, 0x5146, 0x4EAC };	// 
	// ]v'0'
	wregex re(_SW("0+([1-9]+)|O+([P-X]+)"));

	// ӂ
	if (instr.length() > 20) {
		return instr;
	}

	// E1
	wstring outstr = L"";
	wstring ws = L"";
	int i = 0;
	bool flag = FALSE;
	for (int pos = instr.length() - 1; pos >= 0; pos--) {
		if (flag) {
			outstr = (wstring){kantbl[(int)(i / 4)]} + outstr;
			flag = FALSE;
		}
		wstring ss = instr.substr(pos, 1);
		ws = ss + ws;
		i++;
		if (i % 4 == 0) {
			if ((ws == L"0000") || (ws == _SW("OOOO"))) {
				outstr = (wstring){kantbl[(int)(i / 4)]};
			} else {
				outstr = regex_replace(ws, re, L"$1") + outstr;
				flag = TRUE;
			}
			ws = L"";
		}
	}
	outstr = ws + outstr;

	return outstr;
}

/**
 * pɕϊ
 * @param	wstring wsour p܂ޕ
 * @return	wstring 
*/
wstring pahooNormalizeText::num2kan(wstring wsour) {
	// p̃p^[
	wregex re1(_SW("[0-9\\.]+"));
	wsmatch mt;

	wstring wdest = wsour;
	while (std::regex_search(wdest, mt, re1)) {
		wregex re2(mt[0].str());
		wstring kan = this->num2kanji(mt[0].str()) ;
		wdest = regex_replace(wdest, re2, kan);
	}

	return wdest;
}

/**
 * pɕϊiPϊj
 * @param	wstring wsour p܂ޕ
 * @return	wstring 
*/
wstring pahooNormalizeText::num2kanSimple(wstring wsour) {
	static wchar_t kantbl1[] =
		{ L'0', L'1', L'2', L'3', L'4', L'5', L'6', L'7',
			L'8', L'9', L'.', L'-' };
	static wchar_t kantbl2[] =
		{ 0x3007, 0x4E00, 0x4E8C, 0x4E09, 0x56DB, 0x4E94, 0x516D,	// `
			0x4E03, 0x516B, 0x4E5D, 0xFF0E, 0xFF0D };				// D|

	// 1
	wstring wdest = L"";
	wstring wstr;
	wchar_t* wch;
	bool flag;
	for (int pos = 0; pos < (int)wsour.length(); pos++) {
		flag = FALSE;
		wstr = wsour.substr(pos, 1);
		wch  = (wchar_t*)wstr.c_str();
		for (int k = 0; k < (int)(sizeof(kantbl1) / sizeof(kantbl1[0])); k++) {
			if (*wch == kantbl1[k]) {
				wdest += (wstring){kantbl2[k]};
				flag = TRUE;
				break;
			}
		}
		if (! flag) {
			wdest += *wch;
		}
	}
	return wdest;
}

/**
 * ȔpʎL@ɕϊ
 * @param	wstring wsour p܂ޕ
 * @return	wstring 
*/
wstring pahooNormalizeText::bignum2scale(wstring wsour) {
	// p̃p^[
	wregex re1(_SW("[0-9]+0000"));
	wsmatch mt;

	wstring wdest = wsour;
	while (std::regex_search(wdest, mt, re1)) {
		wregex re2(mt[0].str());
		wstring kan = this->num2scale(mt[0].str()) ;
		wdest = regex_replace(wdest, re2, kan, std::regex_constants::format_first_only);	// wsour̒4004݂ꍇɂΉ
	}

	return wdest;
}

/**
 * 𔼊pɕϊ
 * @param	wstring kanji 
 * @param	int mode o͏^1=3J}؂C2=, ȊO=x^ł
 * @return	wstring p
*/
wstring pahooNormalizeText::kan2num(wstring kanji, int mode) {
	wstring dest = L"";
	wsmatch mt1;

	// SppΉ
	const wstring kan_num1 = _SW("OZPQROQSlT܂UZVWX");
	const wstring kan_num2 = _SW("000111222333445566778899");

	// ʎ
	const wstring kan_deci_sub = _SW("\\S");
	const wstring kan_deci = _SW("");

	// p݂Ă牽Ȃ
	wregex re1(_SW("[0-9]+"));
	if (regex_search(kanji, mt1, re1)) {
		return kanji;
	}

	// E߂Ă
	size_t ll = kanji.length();
	wstring a = L"";
	long long int deci = 1;
	long long int deci_sub = 1;
	long long int m = 0;
	long long int n = 0;

	for (int pos = ll - 1; pos >= 0; pos--) {
		wstring c = kanji.substr(pos, 1);
		size_t ps1 = kan_num1.find(c);
		size_t ps2 = kan_deci_sub.find(c);
		size_t ps3 = kan_deci.find(c);
		if (ps1 != wstring::npos) {
			a = kan_num2.substr(ps1, 1) + a;
		} else if (ps2 != wstring::npos) {
			if (a != L"") {
				m = m + stol(a) * deci_sub;
			} else if (deci_sub != 1) {
				m = m + deci_sub;
			}
			a = L"";
			deci_sub = pow(10, ps2 + 1);
		} else if (ps3 != wstring::npos) {
			if (a != L"") {
				m = m + stol(a) * deci_sub;
			} else if (deci_sub != 1) {
				m = m + deci_sub;
			}
			n = m * (long long int)deci + n;
			m = 0;
			a = L"";
			deci_sub = 1;
			deci = (long long int)pow(10000, ps3 + 1);
		}
	}

	wstring ss = L"";
	wregex re2(_SW("^(0+)"));
	if (regex_search(a, mt1, re2)) {
		ss = mt1[1].str();
	}
	if (a != L"") {
		m = m + stol(a) * deci_sub;
	} else if (deci_sub != 1) {
		m = m + deci_sub;
	}
	n = m * deci + n;

	return to_wstring(n);
}

/**
 * ̒P𔼊pɕϊ
 * @param	wstring kanji ̒P
 * @return	wstring p
*/
wstring pahooNormalizeText::kanword2num(wstring kanji) {
	// SppΉ
	const wstring kan_num1 = _SW("OZPQROQSlT܂UZVWX");
	const wstring kan_num2 = _SW("000111222333445566778899");

	// ϊ
	wstring dest = L"";

	for (size_t pos = 0; pos < kanji.length(); pos++) {
		wstring c = kanji.substr(pos, 1);
		size_t ps = kan_num1.find(c);
		if (ps != wstring::npos) {
			dest += kan_num2.substr(ps, 1);
		} else {
			dest += c;
		}
	}

	return dest;
}

/**
 * _̑Sp𔼊pɕϊ
 * @param	string $str  eLXg
 * @return	string ϊ㕶
*/
wstring pahooNormalizeText::hanfloat(wstring wsour) {
	wregex re(_SW("([0-9]+)D([0-9]+)"));

	return regex_replace(wsour, re, L"$1.$2");
}

#ifdef MECAB
/**
 * {eLXg𔼊pɓ
 * @param	wstring sour     ϊeLXg
 * @param	bool    trim     sEs̋󔒂ǂ
 * @param	bool    variable \Lꓝꂷ邩ǂ
 * @return	string  ϊeLXg
*/
wstring pahooNormalizeText::toHankaku(wstring wsour, bool trim, bool variable) {
	regex sep{"\\t|,"};
	wsmatch mt1, mt2, mt3;
	// p^[
	wregex pat_kannum(_SW("^[^]*[0123456789OPQRSTUVWXZOlܘZ\\S疜]+$"));
	// MeCab̕ip^[
	wregex re1(_SW("[]"));
	// uvΉ
	wregex re19(_SW(""));
	// ̊
	wregex re2(_SW("([OlܘZ\\]+)()"));
	// ڑ̏ꍇ͂̂܂
	wregex re3(_SW("ڑ"));
	// s󔒃p^[
	wregex re4(_SW("^[ @\\t\\n\\r]+"));
	// s󔒃p^[
	wregex re5(_SW("[ @\\t\\n\\r]+$"));
	// ϊp^[E1
	wregex re71(_SW(".+"));
	wregex re72(_SW("ڔ|i"));
	// ϊp^[E2
	wregex re8(_SW("^[疜]+$"));
	// ϊp^[E3
	wregex re9(_SW("ꐶ"));

	// J}u
	wregex re6(_SW(","));
	wsour = regex_replace(wsour, re6, _SW("C"));

	// `ԑfɕ
	string input = _WS(wsour);
	const char *words = tagger->parse(input.c_str());

	// ϊ
	bool flag = FALSE;
	bool adverb = FALSE;
	wstring dest = L"";
	wstring numstr = L"";
	wstring surface, pos;

	// 1ꂸǂݍ
	string ss0;
	stringstream ss;
	ss << words;
	while(ss && getline(ss, ss0)) {
		int cnt = 0;
		for (std::cregex_token_iterator end,
			ite{ss0.c_str(), ss0.c_str() + strlen(ss0.c_str()), sep, -1};
			ite != end; ++ite) {
			if (cnt == 0)		surface = _SW((*ite).str().c_str());
			else if (cnt == 2)	pos = _SW((*ite).str().c_str());
			cnt++;
		}
//		clog << _WS(surface) << " : " <<  _WS(pos) << endl;

		// \L𓝈ꂷip^[
		if (variable) {
			wregex reNoun(_SW(""));
			if (regex_search(pos, mt1, reNoun)) {
				// ̎ǂݍ
				if (VariableDict.empty()) {
					VariableDict = this->loadVariableDictionary(VARIABILITY_REPLACE_CSV);
				}
				surface = this->variable2standard(surface);
			}
		}

		// Ō
		if (surface == _SW("EOS")) {
			break;
		// ̏
		} else if (regex_search(surface, mt1, re2)) {
			dest += this->kan2num(mt1[1].str(), 2) + mt1[2].str();
		// ϊ
		} else if (regex_search(pos, mt1, re71) || regex_search(surface, mt1, re9)) {
//			clog << _WS(surface) << endl;
			dest += surface;
		} else if (flag == FALSE) {
			// \Ȑ
			if (regex_search(pos, mt2, re71)) {
				numstr = surface;
				flag = TRUE;
				adverb = TRUE;
			// 1
			 } else if (regex_search(surface, mt1, pat_kannum) && regex_search(pos, mt2, re1)) {
				numstr = surface;
				flag = TRUE;
				adverb = FALSE;
			// uvΉ
			 } else if (regex_search(surface, mt2, re19)) {
				numstr = surface;
				flag = TRUE;
				adverb = FALSE;
			// ł͂Ȃ
			} else {
				dest += surface;
			}
		} else {
			// ϊi\{ڔj
			if (adverb && regex_search(pos, mt2, re72)) {
				dest += (numstr + surface);
				numstr = L"";
				flag = FALSE;
				adverb = FALSE;
			// 2ڈȍ~
			} else if (regex_search(surface, mt1, pat_kannum)) {
				numstr += surface;
				flag = TRUE;
			// ȊO
			} else {
				// ڑ̏ꍇ͂̂܂
				if (regex_search(pos, mt2, re3)) {
					dest += (numstr + surface);
				// ϊp^[
				} else if (regex_search(numstr, mt1, re8)) {
					dest += (numstr + surface);
				// \̏ꍇ
				} else if (adverb) {
					dest += (this->kanword2num(numstr) + surface);
				// ܂ł̊𔼊p
				} else {
					dest += (this->kan2num(numstr, 2) + surface);
				}
				numstr = L"";
				flag = FALSE;
				adverb = FALSE;
			}
		}
	}

	// 
	if (flag == TRUE) {
		if (adverb == TRUE) {
			dest += numstr;
		} else {
			dest += this->kan2num(numstr, 2);
		}
	}

	// sEs󔒏
	if (trim) {
		wstring wss = regex_replace(dest, re4, L"");
		wss = regex_replace(wss, re5, L"");
		dest = wss + L"\\n";
	}

	// u\ԁvΉ
	wregex re11(_SW("ꔪ([^]+)"));
	dest = regex_replace(dest, re11, L"18$1");
	// uEvΉ
	wregex re12(_SW("([0-9]+)E([0-9]+)"));
	dest = regex_replace(dest, re12, L"$1.$2");

	return wconvString(dest, LCMAP_HALFWIDTH);
}
#endif

// _han2zeñR[obN֐Q
// 
bool _decimal(wchar_t wch) {
	return ((wch >= L'0') && (wch <= L'9'));
}
// At@xbg
bool _alphabet(wchar_t wch) {
	return (((wch >= L'A') && (wch <= L'Z'))
				|| ((wch >= L'a') && (wch <= L'z')));
}
// pJ^Ji
bool _katakana(wchar_t wch) {
	return ((wch >= 0xFF66) && (wch <= 0xFF9F));		// `
}
// L
bool _yakumono(wchar_t wch) {
	return (((wch >= L'!') && (wch <= L'/'))
				|| ((wch >= L':') && (wch <= L'@'))
				|| ((wch >= L'[') && (wch <= L'`'))
				|| ((wch >= L'{') && (wch <= L'~'))
				|| ((wch >= 0xFF3B) && (wch <= 0xFF40))
				|| ((wch >= 0xFF5B) && (wch <= 0xFF65)));
}

/**
 * pSpϊɕϊ
 * @param	wstring sour ϊeLXg
 * @param	bool (*func) Y֐
 * @return	wstring  ϊeLXg
*/
wstring pahooNormalizeText::han2zen(wstring wsour, bool (*func)(wchar_t wch)) {
	wstring wss = L"";
	wstring wdest = L"";
	bool flag = FALSE;
//	clog << _WS(wsour) << endl;

	// 擪1
	for (size_t i = 0; i < wsour.length(); i++) {
		wchar_t* wch = (wchar_t*)wsour.substr(i, 1).c_str();
		// YȂwss0
		if (func(*wch)) {
			wss += (wstring){*wch};
			flag = TRUE;
		// pSpϊ
		} else if (flag) {
			wdest += wconvString(wss, LCMAP_FULLWIDTH);
			wdest += wsour.substr(i, 1);
			flag = FALSE;
			wss = L"";
		} else {
			wdest += wsour.substr(i, 1);
		}
	}
	// Ō1Y
	if (flag) {
		wdest += wconvString(wss, LCMAP_FULLWIDTH);
	}

	return wdest;
}

/**
 * {eLXg𐳋K
 * @param	wstring sour   eLXg
 * @param	char*   option ϊIvV
 * @param	bool    trim   sEs̋󔒂ǂ
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::normalizeText(const wstring wsour, const char* option, bool trim) {
	wstring wdest = wsour;

	// tgGhu
	wdest = this->frontend_replace(wdest);

	// 䕶폜
	if (strchr(option, OPTION_CONTROL_DEL) != NULL) {
		wregex re2(_SW("([\\t\\r\\r]+)"));
		wdest = regex_replace(wdest, re2, L"");
	}
	// \L𓝈ꂷ
	bool variable = FALSE;
	if (strchr(option, OPTION_VARIABLE) != NULL) {
		variable = TRUE;
	}
	std::cout << variable << std::endl;

	// 񔼊p
#ifdef MECAB
	wdest = this->toHankaku(wdest, trim, variable);
#endif
//	clog << _WS(wdest) << endl;

	// SpƗׂ荇󔒕
	wregex re11(_SW("[ @\\t]+([^!-~].)"));
	wdest = regex_replace(wdest, re11, L"$1");
	wregex re12(_SW("([^!-~].)[ @\\t]+"));
	wdest = regex_replace(wdest, re12, L"$1");

	// pFpSp
	if (strchr(option, OPTION_ALP_ZEN) != NULL) {
		wdest = this->han2zen(wdest, _alphabet);
	}
	// FʎL@
	if (strchr(option, OPTION_NUM_SCALE) != NULL) {
		wdest = this->bignum2scale(wdest);
	}
	// FpSp
	if (strchr(option, OPTION_NUM_ZEN) != NULL) {
		wdest = this->han2zen(wdest, _decimal);
	// Fp
	} else if (strchr(option, OPTION_NUM_KAN) != NULL) {
		wdest = this->num2kan(wdest);
	// Fp(P)
	} else if (strchr(option, OPTION_NUM_KAN2) != NULL) {
		wdest = this->num2kanSimple(wdest);
	}
	// LFpSp
	if (strchr(option, OPTION_YAK_ZEN) != NULL) {
		wdest = this->han2zen(wdest, _yakumono);
	}
	// J^JiFpSp
	if (strchr(option, OPTION_KANA_ZEN) != NULL) {
		wdest = this->han2zen(wdest, _katakana);
	}
	// _̑Sp𔼊pɕϊ
	wdest = this->hanfloat(wdest);

	// pƗׂ荇Sp󔒂͔p󔒂
	wregex re2(_SW("[@]+([!-~].)"));
	wdest = regex_replace(wdest, re2, L" $1");

	// Spdp̊J
	if (strchr(option, OPTION_DBL_QUOTE) != NULL) {
		wregex re3(_SW("W([^W]*)W"));
		wdest = regex_replace(wdest, re3, _SW("g") + L"$1" + _SW("h"));
	}

	return wdest;
}

// \L𓝈ꂷ ====================================================
/**
 * \Lꓝꎫt@Cɓǂݍ
 * \Lꓝꎫt@CUTF-8Aswtring
 * @param	string fname ϊt@C
 * @return	std::vector<std::pair<std::wstring, std::wstring>> ϊ
*/
std::vector<std::pair<std::wstring, std::wstring>> pahooNormalizeText::loadVariableDictionary(const std::string& filename) {
	std::vector<std::pair<std::wstring, std::wstring>> dict;
	std::ifstream file(filename, std::ios::binary);
	if (!file) { 
		cout << "t@CJ܂: " << filename << endl;
		return dict;
	}

	std::string line;
	while (std::getline(file, line)) {
		//  \r ܂ \n 폜
		while (!line.empty() && (line.back() == '\n' || line.back() == '\r')) {
			line.pop_back();
		}

		size_t commaPos = line.find(',');
		if (commaPos != std::string::npos) {
			std::wstring key = __utf8_wstring(line.substr(0, commaPos));
			std::wstring value = __utf8_wstring(line.substr(commaPos + 1));
			dict.emplace_back(key, value);
		}
	}

	// 1Jڂ̒Ƀ\[g
	std::sort(dict.begin(), dict.end(),
			[](const auto& a, const auto& b){ return a.first.size() > b.first.size(); });

    return dict;
}

/**
 * P̕\L𓝈ꂷ
 * @param	wstring P
 * @return	wstring \L̒P
*/
wstring pahooNormalizeText::variable2standard(wstring word) {
	auto it = std::find_if(VariableDict.begin(), VariableDict.end(),
						[&word](const auto& p){ return p.first == word; });

	// v value ɒu
	if (it != VariableDict.end()) {
		return it->second;
    }
	// vȂ΂̂܂
    return word;
}

// ̌ϊ ==========================================================
/**
 * ̌ϊt@Cǂݍ
 * @param	copnst char* fname ǂݍ݃t@C
 * @return	bool TRUE/FALSE
*/
void pahooNormalizeText::readTableEra(const char* fname) {
	// ϊe[ũp^[
	wregex re(_SW("[\'\"]([0-9 ]+)[\'\"]\\s*=>\\s*[\'\"](.*)[\'\"]"));
	wsmatch mt;

	// ϊe[u
	TableEra.clear();
	TableEra.shrink_to_fit();

	// t@Cǂݍ
	ifstream ifs(fname);
	if (! ifs) {
		this->setError(_SW((string)fname + " ̓ǂݍ݂Ɏs܂"));
		return;
	}
	string ss;
	wstring wss;
	while (getline(ifs, ss)) {
		wss = _SW(ss);
		if (regex_search(wss, mt, re)) {
			_tableEra te;
			te.start = wcstol((const wchar_t*)mt[1].str().c_str(), NULL, 10);
			te.era = mt[2].str();
			TableEra.push_back(te);
		}
	}
}

/**
 * ῗ͈ݒ肷
 * @param	long start JnN YYYYMMDDij`
 * @param	long end   IN YYYYMMDDij`
 * @return	Ȃ
*/
void pahooNormalizeText::setScope(long start, long end) {
	this->convStart =  start;
	this->convEnd   =  end;
}

/**
 * Nῗ͈ǂׂ
 * @param	long yyyymmdd N
 * @return	bool TRUE/FALSE
*/
bool pahooNormalizeText::isConvEra(long yyyymmdd) {
	return ((yyyymmdd >= this->convStart) && (yyyymmdd <= this->convEnd));
}

/**
 * 񂪌ǂf
 * @param	wstring str 
 * @return	bool TRUE/FALSE
*/
bool pahooNormalizeText::isEra(wstring wstr) {
	wsmatch mt;

	if (wstr == L"")	return FALSE;

	for (auto& te : TableEra) {
		wregex re(te.era);
		if (regex_search(wstr, mt, re))		return TRUE;
	}
	return FALSE;
}

/**
 * ɕϊ
 * @param	wstring prefix N̑OɕtĂ镶
 * @param	int year  N
 * @param	int month iȗ\j
 * @param	int day   iȗ\j
 * @param	char* option ϊIvV
 * @return	wstring i{j
*/
wstring pahooNormalizeText::ad2era(wstring prefix, int year, int month=0, int day=0, const char* option=NULL) {
	wstring dest = L"";
	wstring last = L"";
	wstring wstr = L"";
	wsmatch mt;
	wregex re(ESCYEAR);
	long yy = 0;
	long yyyymmdd = year * 10000 + month * 100 + day;

	// aɕϊ邩ǂ
	if (! regex_search(prefix, mt, re) && isConvEra(yyyymmdd)) {
		if (! this->isEra(prefix)) {
			for (auto& te : TableEra) {
				if (yyyymmdd >= te.start) {
					yy   = year - (te.start / 10000);
					last = te.era;
				} else if (last != L"") {
					if (yy == 0) {
						wstr = _SW("");
					} else if (strchr(option, OPTION_ERA_ZEN) != NULL) {
						wstr = this->han2zen(to_wstring(yy + 1), _decimal);
					} else if (strchr(option, OPTION_ERA_KAN1) != NULL) {
						wstr = this->num2kanSimple(to_wstring(yy + 1));
					} else if (strchr(option, OPTION_ERA_KAN2) != NULL) {
						wstr = this->num2kan(to_wstring(yy + 1));
					} else {
						wstr = to_wstring(yy + 1);
					}
					dest = prefix + last + wstr + _SW("N");
					break;
				}
			}
		} else {
			if (year == 1) {
				wstr = _SW("");
			} else if (strchr(option, OPTION_ERA_ZEN) != NULL) {
				wstr = this->han2zen(to_wstring(year), _decimal);
			} else if (strchr(option, OPTION_ERA_KAN1) != NULL) {
				wstr = this->num2kanSimple(to_wstring(year));
			} else if (strchr(option, OPTION_ERA_KAN2) != NULL) {
				wstr = this->num2kan(to_wstring(year));
			} else {
				wstr = to_wstring(year);
			}
			dest = wstr + _SW("N");
		}
	// ϊȂi܂͌̂܂܁j
	} else {
		if (year == 1) {
			wstr = _SW("");
		} else if (strchr(option, OPTION_ERA_ZEN) != NULL) {
			wstr = this->han2zen(to_wstring(year), _decimal);
		} else if (strchr(option, OPTION_ERA_KAN1) != NULL) {
			wstr = this->num2kanSimple(to_wstring(year));
		} else if (strchr(option, OPTION_ERA_KAN2) != NULL) {
			wstr = this->num2kan(to_wstring(year));
		} else {
			wstr = to_wstring(year);
		}
		dest = prefix + wstr + _SW("N");
	}

	if (month > 0) {
		dest += to_wstring(month) + _SW("");
	}
	if (day > 0) {
		dest += to_wstring(day) + _SW("");
	}

	return dest;
}

/**
 * 𐼗ɕϊ
 * @param	wstring prefix N̑OɕtĂ镶ij
 * @param	int year  N
 * @param	int month iȗ\j
 * @param	int day   iȗ\j
 * @param	char* option ϊIvV
 * @return	wstring i{j
*/
wstring pahooNormalizeText::era2ad(wstring prefix, int year, int month=0, int day=0, const char* option=NULL) {
	wstring dest = L"";
	wstring last = L"";
	wstring wstr = L"0";
	wregex re(ESCYEAR);
	wsmatch mt;
	long yy = 0;

	// N֕ϊ
	for (auto& te : TableEra) {
		if ((te.era != L"") && (te.era == prefix)) {
			yy   = year + (int)(te.start / 10000) - 1;
			if (strchr(option, OPTION_AD_ZEN) != NULL) {
				wstr = this->han2zen(to_wstring(yy), _decimal);
			} else if (strchr(option, OPTION_AD_KAN1) != NULL) {
				wstr = this->num2kanSimple(to_wstring(yy));
			} else if (strchr(option, OPTION_AD_KAN2) != NULL) {
				wstr = this->num2kan(to_wstring(yy));
			} else {
				wstr = to_wstring(yy);
			}
			break;
		}
	}

	// ɕϊ邩ǂ
	long yyyymmdd = yy * 10000 + month * 100 + day;
	if (! regex_search(prefix, mt, re) && isConvEra(yyyymmdd)) {
		dest = wstr + _SW("N");
	// ϊȂiâ܂܁j
	} else {
		// a̐ϊ
		if (year == 1) {
			wstr = _SW("");
		} else if (strchr(option, OPTION_AD_ZEN) != NULL) {
			wstr = this->han2zen(to_wstring(year), _decimal);
		} else if (strchr(option, OPTION_AD_KAN1) != NULL) {
			wstr = this->num2kanSimple(to_wstring(year));
		} else if (strchr(option, OPTION_AD_KAN2) != NULL) {
			wstr = this->num2kan(to_wstring(year));
		} else {
			wstr = to_wstring(year);
		}
		dest = prefix + wstr + _SW("N");
	}
	// 
	if (month > 0) {
		dest += to_wstring(month) + _SW("");
	}
	if (day > 0) {
		dest += to_wstring(day) + _SW("");
	}

	return dest;
}

/**
 * aϊiΉj
 * @param	wsmatch mt Np^[Ƀ}b`eLXg
 * @return	wstring a
*/
wstring pahooNormalizeText::seireki2wareki(wsmatch mt, const char* option=NULL) {
	wstring year, month, day;

	if (mt[2].str() != L"") {
		if (mt[2].str() == _SW("")) {
			year = L"1";
		} else {
			year = this->kan2num(mt[2].str(), 0);
		}
	} else {
		year = _SW("0");
	}
	if (mt[3].str() != L"") {
		month = this->kan2num(mt[3].str(), 0);
	} else {
		month = _SW("0");
	}
	if (mt[4].str() != L"") {
		day = this->kan2num(mt[4].str(), 0);
	} else {
		day = _SW("0");
	}

	return this->ad2era(mt[1].str(), stoi(year), stoi(month), stoi(day), option);
}

/**
 * aɓ
 * @param	wstring sour IWiEeLXg
 * @param	char*   option ϊIvV
 * @param	bool    flag   TRUE=GXP[v^FALSE=Ȃ
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::wareki(wstring wsour, const char* option=NULL, bool flag=TRUE) {
	wregex re(_SW("([^0-9O-XZOlܘZ\\S疜-A-I-]{0,4})\\s*([0-9O-XZOlܘZ\\S疜]+)N\\s*([0-9O-XZOlܘZ\\]*)?([0-9O-XZOlܘZ\\]*)?"));
	wregex re2(ESCYEAR);
	wsmatch mt;

	wstring wdest = L"";
	wstring suffix = L"";
	for (bool ismatch = regex_search(wsour, mt, re); ismatch != FALSE;
			ismatch = regex_search(mt[0].second, mt.suffix().second, mt, re)) {
		if (flag) {
			wdest += (wstring)mt.prefix() + 
				regex_replace(this->seireki2wareki(mt, option), re2, L"");
		} else {
			wdest += (wstring)mt.prefix() + this->seireki2wareki(mt, option);
		}
		suffix = (wstring)mt.suffix();		// ϊ̕
	}
	wdest += suffix;

	return wdest;
}

/**
 * aϊiΉj
 * @param	wsmatch mt Np^[Ƀ}b`eLXg
 * @return	wstring 
*/
wstring pahooNormalizeText::wareki2seireki(wsmatch mt, const char* option=NULL) {
	wstring year, month, day;

	if (mt[2].str() == _SW("")) {
		year = L"1";
	} else if (mt[2].str() != L"") {
		year = this->kan2num(mt[2].str(), 0);
	}
	if (mt[3].str() != L"") {
		month = this->kan2num(mt[3].str(), 0);
	} else {
		month = _SW("0");
	}
	if (mt[4].str() != L"") {
		day = this->kan2num(mt[4].str(), 0);
	} else {
		day = _SW("0");
	}

	return this->era2ad(mt[1].str(), stoi(year), stoi(month), stoi(day), option);
}

/**
 * ɓ
 * @param	wstring sour   IWiEeLXg
 * @param	char*   option ϊIvV
 * @param	bool    flag   TRUE=GXP[v^FALSE=Ȃ
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::seireki(wstring wsour, const char* option=NULL, bool flag=TRUE) {
	wregex re(_SW("([^0-9O-XZOlܘZ\\S疜-A-I-]{0,4})\\s*([0-9O-XZOlܘZ\\S疜]+)N\\s*([0-9O-XZOlܘZ\\]*)?([0-9O-XZOlܘZ\\]*)?"));
	wregex re2(ESCYEAR);
	wsmatch mt;

	wstring wdest = L"";
	wstring suffix = L"";
	for (bool ismatch = regex_search(wsour, mt, re); ismatch != FALSE;
			ismatch = regex_search(mt[0].second, mt.suffix().second, mt, re)) {
		if (flag) {
			wdest += (wstring)mt.prefix() + 
				regex_replace(this->wareki2seireki(mt, option), re2, L"");
		} else {
			wdest += (wstring)mt.prefix() + this->wareki2seireki(mt, option);
		}
		suffix = (wstring)mt.suffix();		// ϊ̕
	}
	wdest += suffix;

	return wdest;
}

/**
 * aiajϊ
 * @param	wsmatch mt     Np^[Ƀ}b`eLXg
 * @param	char*   option ϊIvV
 * @return	wstring 
*/
wstring pahooNormalizeText::seireki2mix(wsmatch mt, const char* option=NULL) {
	wstring wdest = L"";
	wstring wstr  = L"";
	wstring ad = this->wareki2seireki(mt, option);

	if (strchr(option, OPTION_ERA_ZEN) != NULL) {
		wstr = this->han2zen(mt[2].str(), _decimal);
	} else if (strchr(option, OPTION_ERA_KAN1) != NULL) {
		wstr = this->num2kanSimple(mt[2].str());
	} else if (strchr(option, OPTION_ERA_KAN2) != NULL) {
		wstr = this->num2kan(mt[2].str());
	} else {
		wstr = mt[2].str();
	}
	wstring wareki = mt[1].str() + wstr + _SW("N");

	if (ad != wareki) {
		wdest = ad + _SW("i") + wareki + _SW("j");
	} else {
		wdest = ad;
	}

	return wdest;
}

/**
 * iajϊ
 * @param	wstring sour   IWiEeLXg
 * @param	char*   option ϊIvV
 * @param	bool    flag   TRUE=GXP[v^FALSE=Ȃ
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::mixture(wstring wsour, const char* option=NULL, bool flag=TRUE) {
	wregex re(_SW("([^0-9O-XZOlܘZ\\S疜-A-I-]{0,4})([0-9]+)N"));
	wregex re2(ESCYEAR);
	wsmatch mt;
	static const char opt[] = { OPTION_ERA_HAN, 0x00 };

	wstring wstr = this->wareki(wsour, opt, FALSE);		// aipjɓ

	wstring wdest  = L"";
	wstring suffix = L"";
	for (bool ismatch = regex_search(wstr, mt, re); ismatch != FALSE;
			ismatch = regex_search(mt[0].second, mt.suffix().second, mt, re)) {
		if (flag) {
			wdest += (wstring)mt.prefix() +
				regex_replace(this->seireki2mix(mt, option), re2, L"");
		} else {
			wdest += (wstring)mt.prefix() + this->seireki2mix(mt, option);
		}
		suffix = (wstring)mt.suffix();		// ϊ̕
	}
	wdest += suffix;

	return wdest;
}

/**
 * ̕ϊ
 * @param	wstring sour   IWiEeLXg
 * @param	char*   option ϊIvV
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::monthday(wstring wsour, const char* option=NULL) {
	wregex re(_SW("([0-9O-XZOlܘZ\\]{1,2})(|)"));
	wsmatch mt;

	wstring suffix = L"";
	wstring wdest  = L"";
	wstring wstr   = L"";

	for (bool ismatch = regex_search(wsour, mt, re); ismatch != FALSE;
			ismatch = regex_search(mt[0].second, mt.suffix().second, mt, re)) {
		if ((strchr(option, OPTION_MODE_AD) != NULL) || 
				(strchr(option, OPTION_MODE_ADERA) != NULL)) {
			if (strchr(option, OPTION_AD_ZEN) != NULL) {
				wstr = this->han2zen(mt[1].str(), _decimal);
			} else if (strchr(option, OPTION_AD_KAN1) != NULL) {
				wstr = this->num2kanSimple(mt[1].str());
			} else if (strchr(option, OPTION_AD_KAN2) != NULL) {
				wstr = this->num2kan(mt[1].str());
			} else {
				wstr = wconvString(mt[1].str(), LCMAP_HALFWIDTH);
			}
		} else if (strchr(option, OPTION_MODE_ERA) != NULL) { 
			if (strchr(option, OPTION_ERA_ZEN) != NULL) {
				wstr = this->han2zen(mt[1].str(), _decimal);
			} else if (strchr(option, OPTION_ERA_KAN1) != NULL) {
				wstr = this->num2kanSimple(mt[1].str());
			} else if (strchr(option, OPTION_ERA_KAN2) != NULL) {
				wstr = this->num2kan(mt[1].str());
			} else {
				wstr = wconvString(mt[1].str(), LCMAP_HALFWIDTH);
			}
		}
		wdest += (wstring)mt.prefix() + wstr + mt[2].str();
		suffix = (wstring)mt.suffix();		// ϊ̕
	}
	wdest += suffix;

	return wdest;
}

/**
 * ̌ϊ
 * @param	wstring sour   IWiEeLXg
 * @param	char*   option ϊIvV
 * @return	wstring ϊeLXg
*/
wstring pahooNormalizeText::convNengo(wstring wsour, const char* option) {
	wstring wdest = L"";

	// ϊ͈
	if (strchr(option, OPTION_SCOPE_1) != NULL) {
		setScope(START_SCOPE1, END_SCOPE);
	} else if (strchr(option, OPTION_SCOPE_2) != NULL) {
		setScope(START_SCOPE2, END_SCOPE);
	} else if (strchr(option, OPTION_SCOPE_0) != NULL) {
		setScope(END_SCOPE, END_SCOPE);
	}

	// ɓ
	if (strchr(option, OPTION_MODE_AD) != NULL) {
		wdest = this->seireki(wsour, option, TRUE);
	// aɓ
	} else if (strchr(option, OPTION_MODE_ERA) != NULL) {
		wdest = this->wareki(wsour, option, TRUE);
	// (a)
	} else if (strchr(option, OPTION_MODE_ADERA) != NULL) {
		wdest = this->mixture(wsour, option, TRUE);
	}
	// 
	wdest = this->monthday(wdest, option);

	return wdest;
}

/*
 ** o[WAbv =====================================================
 *
 * @version 2.0.1  2025/11/23 bug-fix: ϐvariablegp[jO΍
 * @version 2.0.0  2025/08/30 \Lꓝ@\ǉ
 * @version 1.9.1  2023/12/17 pahooNormalizeText() - MECABgp̑΍
 * @version 1.9.0  2023/10/25 normalizeText() - dp̊J
 * @version 1.8.0  2023/10/25 pahooNormalizeText() - MeCab`FbNǉ
 * @version 1.7.2  2023/07/30 bignum2scale() - wsour̒4004݂ꍇɂΉ
 * @version 1.7.1  2023/04/22 toHankaku() - uvΉ
 * @version 1.7.0  2023/04/08 toHankaku() - u\ԁvuEvΉ
 * @version 1.6    2022/07/03 tgGhu frontend_replace() ǉ
 * @version 1.53   2022/05/06 toHankaku():̏𖳕ϊD
 * @version 1.52   2022/04/29 䕶폜@\ǉ
 * @version 1.51   2022/02/19 bug-fix
 * @version 1.5    2022/01/03 1.3`1.4̑΍Ȃǂi΍ɉ
 * @version 1.4    2021/12/02 uꋓvuԁvȂǕ΍
 * @version 1.3    2021/11/06 uꗥvu疜vȂǕ΍
 * @version 1.11   2021/02/01 _yakumono() ɕsĂLǉ
 * @version 1.1    2020/11/14 ̌ϊǉ
 * @version 1.01   2020/11/10 toHankaku(), _yakumono() bug-fix
 * @version 1.0    2020/10/20 
 */
