00001 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */ 00002 00003 /* 00004 * Word breaking in a Unicode sequence. Designed to be used in a 00005 * generic text renderer. 00006 * 00007 * Copyright (C) 2013-15 Tom Hacohen <tom at stosb dot com> 00008 * 00009 * This software is provided 'as-is', without any express or implied 00010 * warranty. In no event will the author be held liable for any damages 00011 * arising from the use of this software. 00012 * 00013 * Permission is granted to anyone to use this software for any purpose, 00014 * including commercial applications, and to alter it and redistribute 00015 * it freely, subject to the following restrictions: 00016 * 00017 * 1. The origin of this software must not be misrepresented; you must 00018 * not claim that you wrote the original software. If you use this 00019 * software in a product, an acknowledgement in the product 00020 * documentation would be appreciated but is not required. 00021 * 2. Altered source versions must be plainly marked as such, and must 00022 * not be misrepresented as being the original software. 00023 * 3. This notice may not be removed or altered from any source 00024 * distribution. 00025 * 00026 * The main reference is Unicode Standard Annex 29 (UAX #29): 00027 * <URL:http://unicode.org/reports/tr29> 00028 * 00029 * When this library was designed, this annex was at Revision 17, for 00030 * Unicode 6.0.0: 00031 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 00032 * 00033 * This library has been updated according to Revision 25, for 00034 * Unicode 7.0.0: 00035 * <URL:http://www.unicode.org/reports/tr29/tr29-25.html> 00036 * 00037 * The Unicode Terms of Use are available at 00038 * <URL:http://www.unicode.org/copyright.html> 00039 */ 00040 00051 #include "unibreakdef.h" 00052 00057 enum WordBreakClass 00058 { 00059 WBP_Undefined, 00060 WBP_CR, 00061 WBP_LF, 00062 WBP_Newline, 00063 WBP_Extend, 00064 WBP_Regional_Indicator, 00065 WBP_Format, 00066 WBP_Katakana, 00067 WBP_Hebrew_Letter, 00068 WBP_ALetter, 00069 WBP_Single_Quote, 00070 WBP_Double_Quote, 00071 WBP_MidNumLet, 00072 WBP_MidLetter, 00073 WBP_MidNum, 00074 WBP_Numeric, 00075 WBP_ExtendNumLet, 00076 WBP_Any 00077 }; 00078 00083 struct WordBreakProperties 00084 { 00085 utf32_t start; 00086 utf32_t end; 00087 enum WordBreakClass prop; 00088 };