52 lines
2.1 KiB
C
52 lines
2.1 KiB
C
#pragma once
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
|
|
// Constants
|
|
|
|
#define HELOS_UNICODE_TYPE (uint32_t) // We use uint32_t to hold a Unicode codepoint.
|
|
|
|
#define HELOS_UNICODE_ERROR ((uint32_t)0xfffd) // The "error" char or "Unicode replacement character".
|
|
#define HELOS_UNICODE_MAX ((uint32_t)0x10ffff) // Maximum valid Unicode codepoint.
|
|
#define HELOS_UNICODE_SELF_REPRESENT (0x80) // Chars below SELF_REPRESENT represents themselves in a single byte.
|
|
|
|
#define HELOS_UTF8_MAX_BYTES 4 // Maximum number of bytes encoding one unicode point in UTF-8.
|
|
#define HELOS_UTF8_CONTINUATION_MIN 0xc0 // Minimum of the continuation range in which bytes are not the first byte of a sequence.
|
|
#define HELOS_UTF8_CONTINUATION_MAX 0xbf // Maximum of the continuation range.
|
|
#define HELOS_UTF8_OVERLONG_LEADER_1 0xc0 // First leader byte forming an overlong sequence (encoding <=0xff in 2 bytes).
|
|
#define HELOS_UTF8_OVERLONG_LEADER_2 0xc1 // Second leader byte forming an overlong sequence.
|
|
#define HELOS_UTF8_INVALID_LEADER_MIN 0xf5 // Minimum of the tailing range in which leader bytes form sequences more than 4 bytes long.
|
|
#define HELOS_UTF8_INVALID_LEADER_MAX 0xff // Maximum of the tailing range in which leader bytes are invalid.
|
|
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
|
|
// utf8_Decode advances the UTF-8 sequence by one character,
|
|
// returning the number of bytes advanced.
|
|
//
|
|
// The codepoint pointer, if not NULL, is set to the decoded value.
|
|
// If the Unicode sequence is invalid, the replacement char is returned.
|
|
size_t utf8_Decode(const char *utf8, size_t length, uint32_t *codepoint);
|
|
|
|
// utf8_EncodeLength returns the number of bytes required to encode
|
|
// the given codepoint in UTF-8 (ranging from 1 to 4).
|
|
//
|
|
// returns 0 if the codepoint is invalid.
|
|
size_t utf8_EncodeLength(uint32_t codepoint);
|
|
|
|
// utf8_Encode encodes a new character into the UTF-8 buffer,
|
|
// if utf8 is not NULL, returning the number of bytes written (max 4).
|
|
//
|
|
// The buffer must have enough space.
|
|
size_t utf8_Encode(char *utf8, uint32_t codepoint);
|
|
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|