↰ Return to documentation for file (src/encoding.cpp)
#include <codecvt>
#include <locale>
#include "ndef-lite/encoding.hpp"
using namespace std;
namespace encoding {
// In theory, this union and system_endianness/all calls to it should be optimized away at compile-time.
// I haven't checked to confirm this
constexpr union {
uint32_t i;
uint8_t c[4];
} endian_int = { 0x01020304 };
constexpr Endian system_endianness() { return (endian_int.c[0] == 0x01) ? Endian::BigEndian : Endian::LittleEndian; }
string to_utf8(const string& src)
{
// Return source
return src;
}
string to_utf8(const u16string& src)
{
// Conversion from UTF-16 to UTF-8 from basic_string<char16_t> string
wstring_convert<codecvt_utf8_utf16<char16_t>, char16_t> conv;
return conv.to_bytes(src);
}
u16string to_utf16(const string& src)
{
// Conversion from UTF-8/ASCII to UTF-16 from basic_string<char> string
wstring_convert<codecvt_utf8_utf16<char16_t>, char16_t> conv;
return conv.from_bytes(src);
}
u16string to_utf16(const u16string& src) { return src; }
u16string to_utf16(const vector<uint8_t>& src)
{
wstring_convert<codecvt_utf8_utf16<char16_t, 0x10ffff, codecvt_mode::consume_header>, char16_t> conv;
return conv.from_bytes(string{ src.begin(), src.end() });
}
vector<uint8_t> to_utf16_bytes(const u16string& src, const Endian& endian)
{
// Create byte vector from u16string contents, no adjustment
vector<uint8_t> bytes;
const bool endian_match = (system_endianness() == endian);
for (auto&& byte : src) {
/* If the system endianness doesn't match the desired output endianness then the byte order must be reversed.
*
* Eg. If on a little endian system and big endian is wanted the bytes must be switched:
* the first 8 bits (byte) put on the array will be the high 8 bytes in the char16_t shifted 8 places, then the
* second 8 bits (byte) will be the low 8
*
* Little endian char16_t: 0xabcd => Big endian uint8_t[2]: { 0xcd, 0xab }
*/
const uint shift_first = (endian_match) ? 8 : 0;
const uint shift_second = (endian_match) ? 0 : 8;
bytes.push_back(static_cast<uint8_t>(byte >> shift_first));
bytes.push_back(static_cast<uint8_t>(byte >> shift_second));
}
return bytes;
}
vector<uint8_t> to_utf16le_bytes(const u16string& src) { return to_utf16_bytes(src, Endian::LittleEndian); }
vector<uint8_t> to_utf16be_bytes(const u16string& src) { return to_utf16_bytes(src, Endian::BigEndian); }
bool has_BOM(const vector<uint8_t>& bytes)
{
return (static_cast<uint8_t>(bytes.at(0)) == BOM_BE_1ST && static_cast<uint8_t>(bytes.at(1)) == BOM_BE_2ND) ||
(static_cast<uint8_t>(bytes.at(0)) == BOM_BE_2ND && static_cast<uint8_t>(bytes.at(1)) == BOM_BE_1ST);
}
bool has_BOM(const u16string& text)
{
return (static_cast<uint8_t>(text.at(0)) == BOM_BE_1ST && static_cast<uint8_t>(text.at(1)) == BOM_BE_2ND) ||
(static_cast<uint8_t>(text.at(0)) == BOM_BE_2ND && static_cast<uint8_t>(text.at(1)) == BOM_BE_1ST);
}
} // namespace encoding