Header file for the line breaking algorithm. More...
#include <stddef.h>
Go to the source code of this file.
Defines | |
#define | LINEBREAK_VERSION 0x0201 |
Version of the library linebreak. | |
#define | LINEBREAK_MUSTBREAK 0 |
Break is mandatory. | |
#define | LINEBREAK_ALLOWBREAK 1 |
Break is allowed. | |
#define | LINEBREAK_NOBREAK 2 |
No break is possible. | |
#define | LINEBREAK_INSIDEACHAR 3 |
A UTF-8/16 sequence is unfinished. | |
Typedefs | |
typedef unsigned char | utf8_t |
Type for UTF-8 data points. | |
typedef unsigned short | utf16_t |
Type for UTF-16 data points. | |
typedef unsigned int | utf32_t |
Type for UTF-32 data points. | |
Functions | |
void | init_linebreak (void) |
Initializes the second-level index to the line breaking properties. | |
void | set_linebreaks_utf8 (const utf8_t *s, size_t len, const char *lang, char *brks) |
Sets the line breaking information for a UTF-8 input string. | |
void | set_linebreaks_utf16 (const utf16_t *s, size_t len, const char *lang, char *brks) |
Sets the line breaking information for a UTF-16 input string. | |
void | set_linebreaks_utf32 (const utf32_t *s, size_t len, const char *lang, char *brks) |
Sets the line breaking information for a UTF-32 input string. | |
int | is_line_breakable (utf32_t char1, utf32_t char2, const char *lang) |
Tells whether a line break can occur between two Unicode characters. | |
Variables | |
const int | linebreak_version |
Version number of the library. |
Header file for the line breaking algorithm.
#define LINEBREAK_ALLOWBREAK 1 |
Break is allowed.
#define LINEBREAK_INSIDEACHAR 3 |
A UTF-8/16 sequence is unfinished.
#define LINEBREAK_MUSTBREAK 0 |
Break is mandatory.
#define LINEBREAK_NOBREAK 2 |
No break is possible.
#define LINEBREAK_VERSION 0x0201 |
Version of the library linebreak.
typedef unsigned short utf16_t |
Type for UTF-16 data points.
typedef unsigned int utf32_t |
Type for UTF-32 data points.
typedef unsigned char utf8_t |
Type for UTF-8 data points.
void init_linebreak | ( | void | ) |
Initializes the second-level index to the line breaking properties.
If it is not called, the performance of get_char_lb_class_lang (and thus the main functionality) can be pretty bad, especially for big code points like those of Chinese.
Tells whether a line break can occur between two Unicode characters.
This is a wrapper function to expose a simple interface. Generally speaking, it is better to use set_linebreaks_utf32 instead, since complicated cases involving combining marks, spaces, etc. cannot be correctly processed.
char1 | the first Unicode character | |
char2 | the second Unicode character | |
lang | language of the input |
void set_linebreaks_utf16 | ( | const utf16_t * | s, | |
size_t | len, | |||
const char * | lang, | |||
char * | brks | |||
) |
Sets the line breaking information for a UTF-16 input string.
[in] | s | input UTF-16 string |
[in] | len | length of the input |
[in] | lang | language of the input |
[out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
void set_linebreaks_utf32 | ( | const utf32_t * | s, | |
size_t | len, | |||
const char * | lang, | |||
char * | brks | |||
) |
Sets the line breaking information for a UTF-32 input string.
[in] | s | input UTF-32 string |
[in] | len | length of the input |
[in] | lang | language of the input |
[out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
void set_linebreaks_utf8 | ( | const utf8_t * | s, | |
size_t | len, | |||
const char * | lang, | |||
char * | brks | |||
) |
Sets the line breaking information for a UTF-8 input string.
[in] | s | input UTF-8 string |
[in] | len | length of the input |
[in] | lang | language of the input |
[out] | brks | pointer to the output breaking data, containing LINEBREAK_MUSTBREAK, LINEBREAK_ALLOWBREAK, LINEBREAK_NOBREAK, or LINEBREAK_INSIDEACHAR |
const int linebreak_version |
Version number of the library.