UFO: Alien Invasion
Loading...
Searching...
No Matches
utf8.cpp
Go to the documentation of this file.
1
4
5/*
6All original material Copyright (C) 2002-2025 UFO: Alien Invasion.
7
8Copyright (C) 1997-2001 Id Software, Inc.
9
10This program is free software; you can redistribute it and/or
11modify it under the terms of the GNU General Public License
12as published by the Free Software Foundation; either version 2
13of the License, or (at your option) any later version.
14
15This program is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18
19See the GNU General Public License for more details.
20
21You should have received a copy of the GNU General Public License
22along with this program; if not, write to the Free Software
23Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24*/
25
26#include "utf8.h"
27#include <string.h>
28
35int UTF8_delete_char_at (char* s, int pos)
36{
37 /* Convert the UTF-8 char offset to byte offset */
39
40 int start = pos;
41 int next = pos;
42
43 while (start > 0 && UTF8_CONTINUATION_BYTE(s[start]))
44 start--;
45 if (s[next] != 0)
46 next++;
47 while (s[next] != 0 && UTF8_CONTINUATION_BYTE(s[next]))
48 next++;
49 /* memmove is the only standard copying function that is guaranteed
50 * to work if the source and destination overlap. */
51 memmove(&s[start], &s[next], strlen(&s[next]) + 1);
52 return (next - start);
53}
54
63int UTF8_insert_char_at (char* s, int n, int pos, int c)
64{
65 /* Convert the UTF-8 char offset to byte offset */
67
68 const int utf8len = UTF8_encoded_len(c);
69 const int tail = strlen(&s[pos]) + 1;
70
71 if (utf8len == 0)
72 return 0;
73
74 if (pos + tail + utf8len > n)
75 return 0;
76
77 /* Insertion: move up rest of string. Also moves string terminator. */
78 memmove(&s[pos + utf8len], &s[pos], tail);
79
80 if (c <= 0x7f) {
81 s[pos] = c;
82 } else if (c <= 0x7ff) { /* c has 11 bits */
83 s[pos] = 0xc0 | (c >> 6); /* high 5 bits */
84 s[pos + 1] = 0x80 | (c & 0x3f); /* low 6 bits */
85 } else if (c <= 0xffff) { /* c has 16 bits */
86 s[pos] = 0xe0 | (c >> 12); /* high 4 bits */
87 s[pos + 1] = 0x80 | ((c >> 6) & 0x3f); /* mid 6 bits */
88 s[pos + 2] = 0x80 | (c & 0x3f); /* low 6 bits */
89 } else if (c <= 0x10ffff) { /* c has 21 bits */
90 s[pos] = 0xf0 | (c >> 18); /* high 3 bits */
91 s[pos + 1] = 0x80 | ((c >> 12) & 0x3f); /* mid 6 bits */
92 s[pos + 2] = 0x80 | ((c >> 6) & 0x3f); /* mid 6 bits */
93 s[pos + 3] = 0x80 | (c & 0x3f); /* low 6 bits */
94 }
95
96 return utf8len;
97}
98
109int UTF8_char_len (unsigned char c)
110{
111 if (c < 0x80)
112 return 1;
113 if (c < 0xc0)
114 return 0;
115 if (c < 0xe0)
116 return 2;
117 if (c < 0xf0)
118 return 3;
119 if (c < 0xf8)
120 return 4;
121 /* UTF-8 used to define 5 and 6 byte sequences, but they are
122 * no longer valid. */
123 return 0;
124}
125
132int UTF8_next (const char** str)
133{
134 size_t len, i;
135 int cp, min;
136 const char* s = *str;
137
138 if (s[0] == '\0')
139 return -1;
140
141 const unsigned char* buf = (const unsigned char*)(s);
142
143 if (buf[0] < 0x80) {
144 len = 1;
145 min = 0;
146 cp = buf[0];
147 } else if (buf[0] < 0xC0) {
148 return -1;
149 } else if (buf[0] < 0xE0) {
150 len = 2;
151 min = 1 << 7;
152 cp = buf[0] & 0x1F;
153 } else if (buf[0] < 0xF0) {
154 len = 3;
155 min = 1 << (5 + 6);
156 cp = buf[0] & 0x0F;
157 } else if (buf[0] < 0xF8) {
158 len = 4;
159 min = 1 << (4 + 6 + 6);
160 cp = buf[0] & 0x07;
161 } else {
162 return -1;
163 }
164
165 for (i = 1; i < len; i++) {
167 return -1;
168 cp = (cp << 6) | (buf[i] & 0x3F);
169 }
170
171 if (cp < min)
172 return -1;
173
174 if (0xD800 <= cp && cp <= 0xDFFF)
175 return -1;
176
177 if (0x110000 <= cp)
178 return -1;
179
180 *str += len;
181 return cp;
182}
183
189{
190 if (c <= 0x7F)
191 return 1;
192 if (c <= 0x07FF)
193 return 2;
194 if (c <= 0xFFFF)
195 return 3;
196 if (c <= 0x10FFFF) /* highest defined Unicode code */
197 return 4;
198 return 0;
199}
200
207size_t UTF8_strlen (const char* str)
208{
209 size_t result = 0;
210
211 while (str[0] != '\0') {
212 const int n = UTF8_char_len((unsigned char)*str);
213 str += n;
214 result++;
215 }
216 return result;
217}
218
227int UTF8_char_offset_to_byte_offset (char* str, int pos)
228{
229 int result = 0;
230
231 while (pos > 0 && str[0] != '\0') {
232 const int n = UTF8_char_len((unsigned char)*str);
233 str += n;
234 result += n;
235 pos--;
236 }
237 return result;
238}
239
247char* UTF8_strncpyz (char* dest, const char* src, size_t limit)
248{
249 size_t length;
250
251 length = strlen(src);
252 if (length > limit - 1) {
253 length = limit - 1;
254 if (length > 0 && (unsigned char) src[length - 1] >= 0x80) {
255 size_t i = length - 1;
256 while ((i > 0) && UTF8_CONTINUATION_BYTE((unsigned char) src[i]))
257 i--;
258 if (UTF8_char_len(src[i]) + i > length)
259 length = i;
260 }
261 }
262
263 memcpy(dest, src, length);
264 dest[length] = '\0';
265
266 return dest;
267}
voidpf void * buf
Definition ioapi.h:42
QGL_EXTERN GLuint GLchar GLuint * len
Definition r_gl.h:99
QGL_EXTERN GLenum GLuint * dest
Definition r_gl.h:101
QGL_EXTERN GLuint GLsizei GLsizei * length
Definition r_gl.h:110
QGL_EXTERN GLint i
Definition r_gl.h:113
int UTF8_char_len(unsigned char c)
length of UTF-8 character starting with this byte.
Definition utf8.cpp:109
int UTF8_delete_char_at(char *s, int pos)
Delete a whole (possibly multibyte) character from a string.
Definition utf8.cpp:35
int UTF8_insert_char_at(char *s, int n, int pos, int c)
Insert a (possibly multibyte) UTF-8 character into a string.
Definition utf8.cpp:63
size_t UTF8_strlen(const char *str)
Count the number of character (not the number of bytes) of a zero termination string.
Definition utf8.cpp:207
int UTF8_next(const char **str)
Get the next utf-8 character from the given string.
Definition utf8.cpp:132
char * UTF8_strncpyz(char *dest, const char *src, size_t limit)
UTF8 capable string copy function.
Definition utf8.cpp:247
int UTF8_char_offset_to_byte_offset(char *str, int pos)
Convert UTF-8 character offset to a byte offset in the given string.
Definition utf8.cpp:227
int UTF8_encoded_len(int c)
Definition utf8.cpp:188
#define UTF8_CONTINUATION_BYTE(c)
Definition utf8.h:35