Extension for String and Char
Before registering the string and char extension, it is recommended that to set the required engine properties.
-
void asbind20::ext::configure_engine_for_ext_string(asIScriptEngine *engine)
Set engine properties for string extension.
This will configure the script engine to use UTF-8 everywhere, and enable char literal, which means ‘a’ is an integral value instead of a string.
Script Char
-
void asbind20::ext::register_script_char(asIScriptEngine *engine, bool generic = has_max_portability())
This module simply wraps the primitive C++ type char32_t as script type char.
It can also serve as a demonstration for registering a primitive C++ type using value_class.
Script String
This module optionally depends on the script char and the array extension, so it is recommended to register them before the script string. However, those interfaces will be automatically disabled if those dependencies are not found.
-
inline void asbind20::ext::register_std_string(asIScriptEngine *engine, bool as_default = true, bool generic = has_max_portability())
-
void asbind20::ext::register_string_utils(asIScriptEngine *engine, bool generic = has_max_portability())
C++ Interface
The C++ interface is the std::string from the standard library with a UTF-8 layer on top of it.
The UTF-8 Library
- group UTF-8 library
Functions for handling UTF-8 encoded string
Functions
-
constexpr std::size_t u8_index(std::string_view str, std::size_t n) noexcept
Get the byte offset of the nth character in a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string to search
n – The character index to find
- Returns:
Byte offset of the nth character, or -1 if out of range
-
constexpr std::size_t u8_index_r(std::string_view str, std::size_t n) noexcept
Get the byte offset of the nth character from the end of a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string to search
n – The character index from the end to find
- Returns:
Byte offset of the nth character from end, or -1 if out of range
-
constexpr unsigned int u8_bytes(char first) noexcept
Determine the number of bytes in a UTF-8 character from its first byte.
- Parameters:
first – The first byte of a UTF-8 character
- Returns:
Number of bytes in the character (1-4)
-
constexpr std::size_t u8_strlen(std::string_view str) noexcept
Count the number of UTF-8 characters in a string.
- Parameters:
str – The UTF-8 encoded string to measure
- Returns:
Number of characters in the string
-
inline char32_t u8_bytes_to_int(const char *str) noexcept
Convert a sequence of UTF-8 bytes to a Unicode code point.
- Parameters:
str – Pointer to the start of a UTF-8 character sequence
- Returns:
The decoded Unicode code point
-
constexpr unsigned int u8_int_to_bytes(char32_t ch, char *buf)
Convert a Unicode code point to UTF-8 bytes.
- Parameters:
ch – The Unicode code point to encode
buf – Buffer to store the UTF-8 bytes (must have space for 4 bytes)
- Returns:
Number of bytes written to the buffer (1-4)
-
constexpr std::string_view u8_substr(std::string_view sv, std::size_t pos, std::size_t n = std::string_view::npos)
Extract a substring from a UTF-8 string by character position.
- Parameters:
sv – The UTF-8 encoded string view
pos – Starting character position
n – Number of characters to extract (or npos for remainder)
- Returns:
Substring view
-
constexpr std::string_view u8_substr_r(std::string_view sv, std::size_t pos, std::size_t n = std::string_view::npos)
Extract a substring from a UTF-8 string by character position from end.
- Parameters:
sv – The UTF-8 encoded string view
pos – Starting character position from end
n – Number of characters to extract (or npos for remainder)
- Returns:
Substring view
-
constexpr std::string_view u8_remove_prefix(std::string_view str, std::size_t n)
Remove the first n characters from a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string view
n – Number of characters to remove from start
- Returns:
String view with prefix removed
-
constexpr std::string_view u8_remove_suffix(std::string_view str, std::size_t n)
Remove the last n characters from a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string view
n – Number of characters to remove from end
- Returns:
String view with suffix removed
-
inline void u8_replace_inplace(std::string &target, std::size_t idx, std::size_t n, std::string_view str)
Replace a range of characters in a UTF-8 string.
- Parameters:
target – The string to modify
idx – Starting character index
n – Number of characters to replace
str – Replacement string
- Throws:
std::out_of_range – if idx is invalid
-
inline void u8_replace_inplace_r(std::string &target, std::size_t idx, std::size_t n, std::string_view str)
Replace a range of characters in a UTF-8 string from the end.
- Parameters:
target – The string to modify
idx – Starting character index from end
n – Number of characters to replace
str – Replacement string
- Throws:
std::out_of_range – if idx is invalid
-
constexpr std::size_t u8_index(std::string_view str, std::size_t n) noexcept
-
class const_string_iterator
Bidirectional iterator for UTF-8 encoded strings.
Provides character-by-character iteration over a UTF-8 string, returning Unicode code points.
Public Types
-
using value_type = std::uint32_t
-
using iterator_category = std::bidirectional_iterator_tag
Public Functions
-
inline const_string_iterator(std::string_view str, std::size_t offset)
-
const_string_iterator(const const_string_iterator&) = default
-
inline bool operator==(const const_string_iterator &rhs) const
-
inline const_string_iterator &operator++()
-
inline const_string_iterator operator++(int)
-
inline const_string_iterator &operator--()
-
inline const_string_iterator operator--(int)
-
inline char32_t operator*() const noexcept
-
inline bool is_end() const
-
inline explicit operator bool() const
-
using value_type = std::uint32_t
-
inline const_string_iterator asbind20::ext::utf8::string_cbegin(std::string_view str)
Get an iterator to the beginning of a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string view
- Returns:
Iterator to the first character
-
inline const_string_iterator asbind20::ext::utf8::string_cend(std::string_view str)
Get an iterator to the end of a UTF-8 string.
- Parameters:
str – The UTF-8 encoded string view
- Returns:
Iterator to the position after the last character