功能函数库¶
1 获取lacale集¶
1.1 windows版¶
#include<Windows.h>
#include<string>
using std::wstring;
using std::string;
#ifdef UNICODE
typedef wstring tstring;
#else
typedef string tstring;
#endif // UNICODE
typedef std::vector<tstring> tLocales;
std::vector<tstring> g_locales;
BOOL CALLBACK MyLocaleEnumProc(LPTSTR szLocaleString)
{
g_locales.push_back(szLocaleString);
return TRUE;
}
//LCID码转语言和国家全称(English_United States.1252)
tstring& LCID2NLS(LCID lcid, tstring& bstrRetBuf)
{
TCHAR arcBuf[128];
memset(arcBuf, 0, sizeof(arcBuf));
GetLocaleInfo(lcid, LOCALE_SENGLANGUAGE, arcBuf, 127);// English name of language, eg "German"
bstrRetBuf = arcBuf;
memset(arcBuf, 0, sizeof(arcBuf));
//Loading the English name for the country/region
GetLocaleInfo(lcid, LOCALE_SENGCOUNTRY, arcBuf, 127);// English name of country/region, eg "Germany"
if (*arcBuf)
{
bstrRetBuf += TEXT("_");
bstrRetBuf += arcBuf;
}
//Loading the code page
memset(arcBuf, 0, sizeof(arcBuf));
if ((GetLocaleInfo(lcid, LOCALE_IDEFAULTANSICODEPAGE, arcBuf, 127)
|| GetLocaleInfo(lcid, LOCALE_IDEFAULTCODEPAGE, arcBuf, 127))
&& *arcBuf)
{
bstrRetBuf += TEXT(".");
bstrRetBuf += arcBuf;
}
return bstrRetBuf;
}
//LCID码转语言和国家缩写(en_US)
tstring& LCID2NLSAbbr(LCID lcid, tstring& bstrRetBuf)
{
TCHAR arcBuf[128];
memset(arcBuf, 0, sizeof(arcBuf));
GetLocaleInfo(lcid, LOCALE_SISO639LANGNAME, arcBuf, 127);// ISO abbreviated language name, eg "en"
bstrRetBuf = arcBuf;
memset(arcBuf, 0, sizeof(arcBuf));
//Loading the English name for the country/region
GetLocaleInfo(lcid, LOCALE_SISO3166CTRYNAME, arcBuf, 127); // ISO abbreviated country/region name, eg "US"
if (*arcBuf)
{
bstrRetBuf += TEXT("_");
bstrRetBuf += arcBuf;
}
return bstrRetBuf;
}
std::vector<tstring> GetAllLocales(bool isAbbr=true)
{
EnumSystemLocales(&MyLocaleEnumProc, LCID_INSTALLED);
std::vector<tstring> vNLS,vNLSAbbr;
tstring ts;
for (tLocales::const_iterator i = g_locales.begin(); i != g_locales.end(); i++)
{
if (isAbbr) {//en_US
LCID2NLSAbbr(std::stoul(i->c_str(), nullptr, 16), ts);
vNLSAbbr.emplace_back(ts);
}
else {//English_United States.1252
LCID2NLS(std::stoul(i->c_str(), nullptr, 16), ts);
vNLS.emplace_back(ts);
}
}
if (isAbbr) {
return vNLSAbbr;
}
else {
return vNLS;
}
}
int main() {
//获取缩写locale集 en_US
std::vector<tstring> vstrAbbr= GetAllLocales();
//获取全称locale集 (English_United States.1252)
std::vector<tstring> vstr = GetAllLocales(false);
}
1.2 Linux版¶
linux上可以通过调用系统locale -a
命令来获取
#include <stdio.h>
#include <iostream>
#include <string>
#include <set>
using namespace std;
int main()
{
char buf[128] = {0};
FILE *fp = NULL;
fp = popen("locale -a", "r");
set<string> sstr;
while (!feof(fp))
{
fgets(buf, sizeof(buf) - 1, fp);
sstr.insert(string(buf));
}
//fread(buf, 1, 2047, fp);
pclose(fp);
for (auto &&str : sstr)
{
std::cout << str << std::endl;
}
}
2 宽字符多字节转换¶
- 注意:把多字节编码转换成另一多字节编码,需把宽字符编码作为中转编码。
2.1 跨平台¶
- 代码如下
#include<string>
#include<fstream>
#pragma warning(disable : 4996)
int main() {
//utf-8转宽字符
{
std::cout << "utf-8转宽字符->" << std::endl;
setlocale(LC_ALL, "en_US.utf8");
const char* mbstr = u8"z你好z";
//获取所需wchar_t空间
size_t len = mbstowcs(NULL, mbstr, 0);
wchar_t* wstr = new wchar_t[len + 1];
wstr[len] = L'\0';
mbstowcs(wstr, mbstr, 5);
setlocale(LC_ALL, "");
std::wcout << L"宽字符:" << wstr << std::endl;
delete[]wstr;
}
//本地环境编码(简体中文系统gbk)转宽字符
{
setlocale(LC_ALL, "");
std::wcout << L"本地环境编码(简体中文系统gbk)转宽字符->" << std::endl;
const char* mbstr = "z你好z";
//获取所需wchar_t空间
size_t len = mbstowcs(NULL, mbstr, 0);
wchar_t* wstr = new wchar_t[len + 1];
wstr[len] = L'\0';
mbstowcs(wstr, mbstr, 5);
std::wcout << L"宽字符:" << wstr << std::endl;
delete[]wstr;
}
//宽字符转utf-8
{
setlocale(LC_ALL, "");
std::wcout << L"宽字符转utf-8->" << std::endl;
//设置locale到utf-8环境,这样宽字符才会转出多字节的utf-8编码
std::string lc= setlocale(LC_ALL, "en_US.utf8");
const wchar_t* wstr = L"z你好z";
//获取所需wchar_t空间
size_t len = wcstombs(NULL, wstr, 0);
char* mbstr = new char[len + 1];
mbstr[len] = '\0';
wcstombs(mbstr, wstr, len);
//由于控制台直接打印utf-8编码字符比较困难,故写入文件中,打开1.txt文件,查看编码格式utf-8,内容“z你好z”
std::ofstream fout("1.txt");
fout << mbstr;
fout.close();
delete[]mbstr;
}
//宽字符转本地环境编码(简体中文系统gbk)
{
setlocale(LC_ALL, "");
std::wcout << L"宽字符转本地环境编码(简体中文系统gbk)->" << std::endl;
//设置locale到utf-8环境,这样宽字符才会转出多字节的utf-8编码
setlocale(LC_ALL, "");
const wchar_t* wstr = L"z你好z";
//获取所需wchar_t空间
size_t len = wcstombs(NULL, wstr, 0);
char* mbstr = new char[len + 1];
mbstr[len] = '\0';
wcstombs(mbstr, wstr, len);
std::cout << "多字节:";
std::cout << mbstr << '\n';
delete[]mbstr;
}
}
- 输出结果
utf-8转宽字符->
宽字符:z你好z
本地环境编码(简体中文系统gbk)转宽字符->
宽字符:z你好z
宽字符转utf-8->
宽字符转本地环境编码(简体中文系统gbk)->
多字节:z你好z
2.2 windows平台¶
#include<Windows.h>
using namespace std;
int main() {
const char* szU8 = u8"z你好z";
//原始字符串为utf-8编码
int wcsLen = ::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), NULL, 0);
wchar_t* wszString = new wchar_t[wcsLen + 1];
::MultiByteToWideChar(CP_UTF8, NULL, szU8, strlen(szU8), wszString, wcsLen);
wszString[wcsLen] = '\0';
//转换宽字符到ANSI编码(根据操作系统语言环境决定)
int ansiLen = ::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), NULL, 0, NULL, NULL);
char* szAnsi = new char[ansiLen + 1];
::WideCharToMultiByte(CP_ACP, NULL, wszString, wcslen(wszString), szAnsi, ansiLen, NULL, NULL);
szAnsi[ansiLen] = '\0';
setlocale(LC_CTYPE, "");
wcout << wszString << endl;
cout << szAnsi << endl;
}
3 读取文件¶
3.1 按行读取¶
- 方式一:cpp方法
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
int main()
{
ifstream rf("1.txt");
if (rf.is_open())
{
string str;
while (getline(rf,str))
{
std::cout << str << std::endl;
}
}
}
- 方式二:c方法
#include "stdlib.h"
#include "stdio.h"
int main(int argc, char *argv[])
{
FILE *in= fopen("D:/in.java", "r");
char buf[1024];
while (fgets(buf, sizeof(buf), in) != NULL)
{
printf("%s", buf);
}
fclose(in);
return 0;
}
3.2 全部读取¶
- 方式一:读取到c风格字符串(char*)中
#include <fstream>
int main()
{
std::ifstream rf;
int length;
rf.open("1.txt"); // open input file
rf.seekg(0, std::ios::end); // go to the end
length = rf.tellg(); // report location (this is the length)
rf.seekg(0, std::ios::beg); // go back to the beginning
buffer = new char[length]; // allocate memory for a buffer of appropriate dimension
rf.read(buffer, length); // read the whole file into the buffer
rf.close(); // close file handle
}
- 方式二:读取到string(streambuf)
#include <string>
#include <fstream>
#include <streambuf>
int main()
{
std::ifstream rf("1.txt");
std::string data((std::istreambuf_iterator<char>(rf)), std::istreambuf_iterator<char>());
}
- 方式三:读取到string(stringstream)
#include <string>
#include <fstream>
#include <sstream>
int main()
{
std::ifstream rf("1.txt");
std::stringstream buffer;
buffer << rf.rdbuf();
std::string data(buffer.str());
}
4 获取当前工作目录和当前执行程序目录¶
当前工作目录和当前执行程序目录不是同一个目录。
- 当前工作目录:是指启动可执行程序时所在的目录
- 当前执行程序目录:可执行程序所在目录
- 解释:假设有可执行程序~/test/test.exe
,当在~
用户目录下执行./test/test.exe
时,工作目录是~
,而当前可执行程序目录是~/test
。
4.1 获取当前工作目录¶
- 方法一:
getcwd
- 方法二:
get_current_dir_name
- 示例
#include <unistd.h>
int main(int argc, char** argv)
{
{//方法一
char *path;
path = get_current_dir_name();
return 0;
}
{//方法二
char *path = NULL;
path = getcwd(NULL,0);
puts(path);
free(path);
}
return 0;
}
4.2 获取当前执行程序目录¶
通过readlink读取/proc/self/exe
,获取程序路径(含程序名)。
- 示例
#include<unistd.h>
int main(int argc, char** argv)
{
char cur_path[1024]={0};
if (readlink("/proc/self/exe", cur_path, BUF_SMALL)<0)
{
printf("获取可执行程序路径失败\n");
return -1;
}
//截取掉程序名
string cur_dir= cur_path;
pos=cur_dir.find_last_of('/');
cur_dir = cur_dir.substr(0, pos);
return 0;
}