00001
00007 #include <stdio.h>
00008 #include "check.h"
00009
00010
00011
00012
00013
00014
00016 #define UTF8_HEAD_7 0x00
00017
00018 #define UTF8_HEAD_7_MASK 0x80
00019
00021 #define UTF8_HEAD_11 0xc0
00022
00023 #define UTF8_HEAD_11_MASK 0xe0
00024
00026 #define UTF8_TAIL 0x80
00027
00028 #define UTF8_TAIL_MASK 0xc0
00029
00030 #define UTF8_TAIL_SHIFT 6
00031
00033 #define UTF8_SHIFTER(data,byte) (data >> (byte*UTF8_TAIL_SHIFT))
00034
00035 #define UTF8_HEADER_7(data) (UTF8_HEAD_7 | (data & ~UTF8_HEAD_7_MASK))
00036
00037 #define UTF8_HEADER_11(data) (UTF8_HEAD_11 | (data & ~UTF8_HEAD_11_MASK))
00038
00039 #define UTF8_TAILER(data) (UTF8_TAIL | (data & ~UTF8_TAIL_MASK))
00040
00042 #define IS_UTF8_HEADER_7(byte) ((unsigned char)(byte & UTF8_HEAD_7_MASK) == UTF8_HEAD_7)
00043
00044 #define IS_UTF8_HEADER_11(byte) ((unsigned char)(byte & UTF8_HEAD_11_MASK) == UTF8_HEAD_11)
00045
00046 #define IS_UTF8_TAIL(byte) ((unsigned char)(byte & UTF8_TAIL_MASK) == UTF8_TAIL)
00047
00049 #define UTF8_UNSHIFTER(data,byte) (data << (byte*UTF8_TAIL_SHIFT))
00050
00051 #define UTF8_UNHEADER_7(data) (data & ~UTF8_HEAD_7_MASK)
00052
00053 #define UTF8_UNHEADER_11(data) (data & ~UTF8_HEAD_11_MASK)
00054
00055 #define UTF8_UNTAILER(data) (data & ~UTF8_TAIL_MASK)
00056
00057
00058 size_t iso8859_utf8(const char *source, char *dest, const size_t dest_size)
00059 {
00060 size_t i;
00061 size_t j;
00062 unsigned char byte;
00063
00064 check(source != NULL);
00065 check(dest != NULL);
00066 check(dest_size > 0);
00067
00068 i = 0, j =0;
00069 while(i < dest_size)
00070 {
00071 byte = (unsigned)source[i];
00072 if (byte < 0x80)
00073 {
00074 dest[j] = UTF8_HEADER_7(UTF8_SHIFTER(byte,0));
00075 j++;
00076 }
00077 else
00078 {
00079 dest[j] = UTF8_HEADER_11(UTF8_SHIFTER(byte,1));
00080 j++;
00081 dest[j] = UTF8_TAILER(byte);
00082 j++;
00083 }
00084
00085 if (byte == '\0')
00086 {
00087 break;
00088 }
00089 i++;
00090 }
00091
00092 return j;
00093 }
00094
00095 size_t utf8_iso8859(const char *source, char *dest, const size_t dest_size)
00096 {
00097 size_t i;
00098 size_t j;
00099 unsigned char byte;
00100
00101 check(source != NULL);
00102 check(dest != NULL);
00103 check(dest_size > 0);
00104
00105 i = 0, j =0;
00106 while(i < dest_size)
00107 {
00108 byte = source[i];
00109 if (IS_UTF8_HEADER_7(byte))
00110 {
00111 dest[j] = UTF8_UNHEADER_7(byte);
00112 j++;
00113 }
00114 else if (IS_UTF8_HEADER_11(byte))
00115 {
00116 dest[j] = UTF8_UNSHIFTER(UTF8_UNHEADER_11(byte),1);
00117 i++, byte = source[i];
00118 dest[j] |= UTF8_UNTAILER(byte);
00119 j++;
00120 }
00121
00122 if (byte == '\0')
00123 {
00124 break;
00125 }
00126 i++;
00127 }
00128
00129 return j;
00130 }