/* ±â´É: ÇÑ±Û ·Î¸¶ÀÚÇ¥±â ¿¬±¸°³¹ß: ÀÓ¹®È¯, 2004³â ¹èÆ÷: ÀÚÀ¯·Ó°Ô */ /* ±¹¾îÀÇ ·Î¸¶ÀÚ Ç¥±â¹ý: Á¦1Àå Ç¥±âÀÇ ±âº» ¿øÄ¢ Á¦1Ç× ±¹¾îÀÇ ·Î¸¶ÀÚ Ç¥±â´Â ±¹¾îÀÇ Ç¥ÁØ ¹ßÀ½¹ý¿¡ µû¶ó Àû´Â °ÍÀ» ¿øÄ¢À¸·Î ÇÑ´Ù. Á¦2Ç× ·Î¸¶ÀÚ ÀÌ¿ÜÀÇ ºÎÈ£´Â µÇµµ·Ï »ç¿ëÇÏÁö ¾Ê´Â´Ù. */ /* ÀÌ °÷¿¡¼­´Â ¹ßÀ½ÀÌ ¾Æ´Ñ ¹®ÀÚ¸¦ ±âÁØÀ¸·Î ÇßÀ½. Â÷ÈÄ ¹ßÀ½¿¡ µû¸¥ Ç¥±â¹ýÀ» Ãß°¡ÇÒ ¿¹Á¤ÀÓ. Çѱ۹®ÀÚÀÇ ÀÚ¼Ò¸¦ ºÐ¸®Çϱâ À§Çؼ­ ÇѱÛÁ¶ÇÕÇü ÄÚµåÆäÀÌÁö¸¦ »ç¿ëÇÏ¿´À½. ¿Ï¼ºÇü ÄÚµåÆäÀÌÁö¿¡´Â ¾î¶² ±ÔÄ¢ÀÌ ÀÖ´ÂÁö ¾ÆÁ÷ ºÐ¼®ÇÏÁö ¸øÇßÀ½. */ #include "KoreanRomanNotation.h" /////////////////////////////////////////////////////////////////////// namespace Koreanromannotation { const char **Cho_seong_roman = 0; const char **Jung_seong_roman = 0; const char **Jong_seong_roman = 0; /////////////////////////////////////////////////////////////////////// /*Çѱ۰ú ±âŸ ¹®ÀÚ°¡ ¾î¿ì·¯Áø ¹®ÀÚ¿­(str)ÀÇ ÇÑ±Û ·Î¸¶ÀÚÇ¥±â¸¦ ±¸ÇÔ by_pronunciation : ¹ßÀ½ ±âÁØÀ¸·Î ·Î¸¶ÀÚ·Î º¯È¯ÇÒÁö ¿©ºÎ each_char_to_capital : °¢ ¹®ÀÚÀÇ ·Î¸¶ÀÚÇ¥±â °á°ú Áß Ã¹¹®ÀÚ¸¦ ´ë¹®ÀÚ·Î ÇÒÁö ¿©ºÎ space_each_char : ÇÑ±Û ±âÁØ °¢ ¹®ÀÚ »çÀ̸¦ ¶ç¿ïÁö ¿©ºÎ */ string_type RomanFromKor(const string_type& str,bool by_pronunciation,bool each_char_to_capital,bool space_each_char) { unsigned char *p = (unsigned char *)ConvertCodePage(str.c_str(),CP_KOREAN_WANSUNG,CP_KOREAN_JOHAB,0); char buff[128],*p2; TCharInfo prev_char_info,char_info; string_type res; prev_char_info.Clear(); if(by_pronunciation) { Cho_seong_roman = (const char **)Cho_seong_roman_by_pronunciation; Jung_seong_roman = (const char **)Jung_seong_roman_by_pronunciation; Jong_seong_roman = (const char **)Jong_seong_roman_by_pronunciation; } else { Cho_seong_roman = (const char **)Cho_seong_roman_by_char; Jung_seong_roman = (const char **)Jung_seong_roman_by_char; Jong_seong_roman = (const char **)Jong_seong_roman_by_char; } for(int i=0; p[i] ;++i) { //ÇѱÛ:84~D3 ¾Æ½ºÅ°¿ø¹®ÀÚÀϾîµî:D8~DE ÇÑÀÚ:E0~F9 if(p[i]>=0x84 && p[i]<=0xD3) { //À§ÀÇ if¹®¿¡¼­ Çѱ۹®ÀÚÀÓÀ» È®ÀÎÇÏ¿´À¸¹Ç·Î ¾Æ·¡ ÇÔ¼öÈ£ÃâÀÇ ¹Ýȯ°ªÀº Ç×»ó true. //¸¸¾à ¹Ýȯ°ªÀÌ true°¡ ¾Æ´Ï¶ó¸é ¹ö±×À̹ǷΠ¹Ù·ÎÀâ¾Æ¾ß ÇÔ. RomanFromKorChar(*((KorCodeType*)(p+i)),char_info); //¹ßÀ½¿¡ µû¸¥ ·Î¸¶ÀÚ Ç¥±â´Â À½¿î Çö»óÀ» ó¸®ÇØ¾ß ÇÔ. if(by_pronunciation) Pronounce(prev_char_info,char_info,res); if(i>0) { //'¿ï¸ªµµ'¿¡¼­¿Í °°ÀÌ '¤©¤©'ÀÎ °æ¿ì 'll'·Î ÇØ¾ß ÇÔ. if(!by_pronunciation && prev_char_info.cho_seong!=0x4184 && prev_char_info.jong_seong==0x4984 && char_info.cho_seong==0x419C && char_info.jung_seong!=0x4184) //if(!by_pronunciation && prev_char_info.cho_seong!=0x4184 && prev_char_info.jong_seong_roman=="l" && char_info.cho_seong_roman=="r" && char_info.jung_seong!=0x4184) { res[res.size()-1]='l'; char_info.cho_seong_roman = "l"; } if(space_each_char) res += " "; } if(each_char_to_capital) { if(char_info.cho_seong_roman.size()!=0) char_info.cho_seong_roman[0] -= ('a'-'A'); else if(char_info.jung_seong_roman.size()!=0) char_info.jung_seong_roman[0] -= ('a'-'A'); } res += char_info.cho_seong_roman + char_info.jung_seong_roman + char_info.jong_seong_roman; prev_char_info = char_info; ++i; } else { //ASCII ÄÚµåÀÇ °æ¿ì if(p[i]<128) { res += p[i]; } //ÀϾÀÚ, ÇÑÀÚ, ¿ø¹®ÀÚ µî, ´Ù½Ã ¿Ï¼ºÇüÀ¸·Î º¯È¯ else { buff[0]=(char)p[i]; buff[1]=(char)p[i+1]; buff[2]=0; p2 = ConvertCodePage(buff,CP_KOREAN_JOHAB,CP_KOREAN_WANSUNG,0); res += p2; delete[] p2; ++i; } prev_char_info.Clear(); } }//for(int i=0; delete[] p; return res; } /////////////////////////////////////////////////////////////////////// //ÇÑ±Û ÇÑ ¹®ÀÚÀÇ ·Î¸¶ÀÚÇ¥±â¸¦ ±¸ÇÔ bool RomanFromKorChar(KorCodeType kor_code,TCharInfo& char_info) { int idx1,idx2,idx3; char_info.kor_code = kor_code; char_info.Parse(); idx1 = GetRomanCodeIndexFromChoseong(char_info.cho_seong); if(idx1>=0) char_info.cho_seong_roman = Cho_seong_roman[idx1]; else return false; idx2 = GetRomanCodeIndexFromJungseong(char_info.jung_seong); if(idx2>=0) char_info.jung_seong_roman = Jung_seong_roman[idx2]; else return false; idx3 = GetRomanCodeIndexFromJongseong(char_info.jong_seong); if(idx3>=0) char_info.jong_seong_roman = Jong_seong_roman[idx3]; else return false; //ÃʼºÀ¸·Î¸¸ ÀÌ·ç¾îÁø ¹®ÀÚ '¤·' ó¸® //ÀÌ °æ¿ì 'ng'·Î ó¸®Çϱä ÇßÁö¸¸ ¸Â´Â °ÍÀÎÁö Àǹ®ÀÌ °¨ if(idx1==12&&char_info.cho_seong_roman.empty()&&idx2==0&&idx3==0) char_info.cho_seong_roman="ng"; return true; } /////////////////////////////////////////////////////////////////////// //Á¶ÇÕÇü ÄÚµåÆäÀÌÁö¿¡¼­ ÃʼºÀº 0x4184¿¡¼­ ½ÃÀÛÇÏ¿© °¢ Äڵ尣ÀÇ °£°ÝÀÌ 4ÀÓ(0x4184´Â Ãʼº ¾øÀ½ÀÓ). => 0x4184~0x41D0 int GetRomanCodeIndexFromChoseong(KorCodeType choseong_code) { int idx = (choseong_code-0x4184); if(choseong_code>=0x4184&&choseong_code<=0x41D0 && idx%4==0) return (idx/4); else return -1; } /////////////////////////////////////////////////////////////////////// //Á¶ÇÕÇü ÄÚµåÆäÀÌÁö¿¡¼­ Áß¼ºÀº 0x4184¿¡¼­ ½ÃÀÛÇÏ¿© ÄÚµåÀÇ µÎ ¹ÙÀÌÆ®¸¦ ¿ªÀü½ÃÄѳõ¾ÒÀ» ¶§ °¢ Äڵ尣ÀÇ °£°ÝÀÌ 0x20ÀÓ(0x4184´Â Áß¼º ¾øÀ½ÀÓ). => 0x4184~0xA187 int GetRomanCodeIndexFromJungseong(KorCodeType jungseong_code) { KorCodeType inverse_code = (jungseong_code>>8) | (jungseong_code<<8); int idx = (inverse_code-0x8441); if(inverse_code>=0x8441&&inverse_code<=0x87A1 && idx%0x20==0) return (idx/0x0020); else return -1; } /////////////////////////////////////////////////////////////////////// //Á¶ÇÕÇü ÄÚµåÆäÀÌÁö¿¡¼­ Á¾¼ºÀº 0x4184¿¡¼­ ½ÃÀÛÇÏ¿© ÄÚµåÀÇ µÎ ¹ÙÀÌÆ®¸¦ ¿ªÀü½ÃÄѳõ¾ÒÀ» ¶§ °¢ Äڵ尣ÀÇ °£°ÝÀÌ 1ÀÓ(0x4184´Â Á¾¼º ¾øÀ½ÀÓ). => 0x4184~0x5D84 int GetRomanCodeIndexFromJongseong(KorCodeType jongseong_code) { KorCodeType inverse_code = (jongseong_code>>8) | (jongseong_code<<8); if(inverse_code>=0x8441&&inverse_code<=0x845D) return ((inverse_code-0x8441)); else return -1; } /////////////////////////////////////////////////////////////////////// //ÃÊ/Áß/Á¾¼º Áß ¾î¶² °ÍÀÎÁö ÆÇ´Ü PhonemeType WhatPhoneme(KorCodeType phoneme) { KorCodeType inverse_code = (phoneme>>8) | (phoneme<<8); //Á¾¼º if(inverse_code>=0x8441 && inverse_code<=0x845D) return enpt_jong_seong; //Áß¼º else if(inverse_code>=0x8441 && inverse_code<=0x87A1 &&((inverse_code-0x8441)%0x20)==0) enpt_jung_seong; //Ãʼº else if(phoneme>=0x4184 && phoneme<=0x41D0 && (phoneme-0x4184)%4==0) return enpt_cho_seong; //ÇѱÛÀÌÁö¸¸ À½¼Ò°¡ ¾Æ´Ô else if((phoneme&0x00FF)>=0x84 && (phoneme&0x00FF)<=0xD3) return enpt_char; //ÇѱÛÀÌ ¾Æ´Ô return enpt_invalid; } /////////////////////////////////////////////////////////////////////// //ÇÑ±Û ¹ßÀ½¿¡ µû¸¥ ·Î¸¶ÀÚÇ¥±â ±¸Çö void Pronounce(TCharInfo& prev_char_info,TCharInfo& char_info,string_type& res) { //¹Ì¿ÏÀÇ °úÁ¦ } /////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////// char* ConvertCodePage(const char *buff,UINT nCPFrom,UINT nCPTo, DWORD dwFlags) { wchar_t *wcs=mbcs2wcs(buff,nCPFrom,dwFlags); if(!wcs) return NULL; char *mbcs=wcs2mbcs(wcs,nCPTo,0); delete[] wcs; return mbcs; } /////////////////////////////////////////////////////////////////////// wchar_t* mbcs2wcs(const char *mbcs, UINT cp, DWORD dwFlags) { if(!mbcs) return NULL; if(!mbcs[0]) return NULL; //int mbcsLen = strlen(mbcs); int wcsLen = MultiByteToWideChar(cp, dwFlags, mbcs, -1, NULL, 0 ); if(wcsLen==0) return NULL; wchar_t* wcs = new wchar_t[wcsLen+1]; if(!wcs) return NULL; if(MultiByteToWideChar(cp, 0, mbcs, -1, wcs, wcsLen)==0) wcs[0]=0; else wcs[wcsLen]=0; // null-terminate return wcs; } /////////////////////////////////////////////////////////////////////// char* wcs2mbcs(const wchar_t *wcs, UINT cp, DWORD dwFlags) { if(!wcs) return NULL; if(!wcs[0]) return NULL; //int wcsLen = wcslen(wcs); int mbcsLen = WideCharToMultiByte(cp, 0, wcs, -1, NULL, 0, NULL, NULL); if(mbcsLen==0) return NULL; char* mbcs = new char[mbcsLen+1]; if(!mbcs) return NULL; if(WideCharToMultiByte(cp, 0, wcs, -1, mbcs, mbcsLen, NULL, NULL)==0) mbcs[0]=0; else mbcs[mbcsLen]=0; // null-terminate return mbcs; } /////////////////////////////////////////////////////////////////////// const char* const Cho_seong_roman_by_char[]={ "" ,"g" //¤¡ ,"kk" //¤¢ ,"n" //¤¤ ,"d" //¤§ ,"tt" //¤¨ ,"r" //¤© //´Ü '¿ï¸ªµµ'¿¡¼­¿Í °°ÀÌ ¤©¤©Àº ll·Î Ç¥½ÃÇØ¾ß ÇÔ. ,"m" //¤± ,"b" //¤² ,"pp" //¤³ ,"s" //¤µ ,"ss" //¤¶ ,"" //¤· ,"j" //¤¸ ,"jj" //¤¹ ,"ch" //¤º ,"k" //¤» ,"t" //¤¼ ,"p" //¤½ ,"h" //¤¾ ,0 }; const char* const Jung_seong_roman_by_char[]={ "" ,"a" //¤¿ ,"ae" //¤À ,"ya" //¤Á ,"yae" //¤Â ,"eo" //¤Ã ,"?" //? ,"?" //? ,"e" //¤Ä ,"yeo" //¤Å ,"ye" //¤Æ ,"o" //¤Ç ,"wa" //¤È ,"wae" //¤É ,"?" //? ,"?" //? ,"oe" //¤Ê ,"yo" //¤Ë ,"u" //¤Ì ,"wo" //¤Í ,"we" //¤Î ,"wi" //¤Ï ,"?" //? ,"?" //? ,"yu" //¤Ð ,"eu" //¤Ñ ,"ui" //¤Ò ,"i" //¤Ó ,0 }; const char* const Jong_seong_roman_by_char[]={ "" ,"k" //¤¡ ,"kk" //¤¢ ,"ks" //¤£ ,"n" //¤¤ ,"nj" //¤¥ ,"nh" //¤¦ ,"t" //¤§ ,"l" //¤© ,"lk" //¤ª ,"lm" //¤« ,"lp" //¤¬ ,"ls" //¤­ ,"lt" //¤® ,"lp" //¤¯ ,"lh" //¤° ,"m" //¤± ,"?" //? ,"p" //¤² ,"ps" //¤´ ,"s" //¤µ ,"ss" //¤¶ ,"ng" //¤· ,"j" //¤¸ ,"ch" //¤º ,"k" //¤» ,"t" //¤¼ ,"p" //¤½ ,"h" //¤¾ ,0 }; //¾Æ·¡ Ç¥ 3°³´Â ¼öÁ¤ÇØ¾ß ÇÒ °÷ÀÌ ÀÖ°í ÇöÀç ±× ³»¿ëÀ» »ç¿ëÇÏÁö ¾ÊÀ½ const char* const Cho_seong_roman_by_pronunciation[]={ "" ,"g" //¤¡ ,"kk" //¤¢ ,"n" //¤¤ ,"d" //¤§ ,"tt" //¤¨ ,"r" //¤© //´Ü '¿ï¸ªµµ'¿¡¼­¿Í °°ÀÌ ¤©¤©Àº ll·Î Ç¥½ÃÇØ¾ß ÇÔ. ,"m" //¤± ,"b" //¤² ,"pp" //¤³ ,"s" //¤µ ,"ss" //¤¶ ,"" //¤· ,"j" //¤¸ ,"jj" //¤¹ ,"ch" //¤º ,"k" //¤» ,"t" //¤¼ ,"p" //¤½ ,"h" //¤¾ ,0 }; const char* const Jung_seong_roman_by_pronunciation[]={ "" ,"a" //¤¿ ,"ae" //¤À ,"ya" //¤Á ,"yae" //¤Â ,"eo" //¤Ã ,"?" //? ,"?" //? ,"e" //¤Ä ,"yeo" //¤Å ,"ye" //¤Æ ,"o" //¤Ç ,"wa" //¤È ,"wae" //¤É ,"?" //? ,"?" //? ,"oe" //¤Ê ,"yo" //¤Ë ,"u" //¤Ì ,"wo" //¤Í ,"we" //¤Î ,"wi" //¤Ï ,"?" //? ,"?" //? ,"yu" //¤Ð ,"eu" //¤Ñ ,"ui" //¤Ò ,"i" //¤Ó ,0 }; const char* const Jong_seong_roman_by_pronunciation[]={ "" ,"k" //¤¡ ,"kk" //¤¢ ,"ks" //¤£ ,"n" //¤¤ ,"nj" //¤¥ ,"nh" //¤¦ ,"t" //¤§ ,"l" //¤© ,"lk" //¤ª ,"lm" //¤« ,"lp" //¤¬ ,"ls" //¤­ ,"lt" //¤® ,"lp" //¤¯ ,"lh" //¤° ,"m" //¤± ,"?" //? ,"p" //¤² ,"ps" //¤´ ,"s" //¤µ ,"ss" //¤¶ ,"ng" //¤· ,"j" //¤¸ ,"ch" //¤º ,"k" //¤» ,"t" //¤¼ ,"p" //¤½ ,"h" //¤¾ ,0 }; }//namespace Koreanromannotation