Skip to content

Instantly share code, notes, and snippets.

@fand
Last active January 18, 2024 00:20
Show Gist options
  • Select an option

  • Save fand/4deb0ae2242bbdab5743085ea9918d8a to your computer and use it in GitHub Desktop.

Select an option

Save fand/4deb0ae2242bbdab5743085ea9918d8a to your computer and use it in GitHub Desktop.

Revisions

  1. fand revised this gist Jan 18, 2024. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion main.rs
    Original file line number Diff line number Diff line change
    @@ -16,7 +16,7 @@ impl EmojiFinder {

    /// Return byte indices of emojis in the text.
    pub fn find(&self, s: &str) -> Vec<usize> {
    let mut indices = Vec::new();
    let mut indices = vec![];
    let mut index = 0;

    for grapheme in s.graphemes(true) {
  2. fand created this gist Jan 17, 2024.
    52 changes: 52 additions & 0 deletions main.rs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,52 @@
    extern crate unicode_segmentation;
    use unicode_segmentation::UnicodeSegmentation;

    use regex;

    struct EmojiFinder {
    re: regex::Regex,
    }

    impl EmojiFinder {
    pub fn new() -> Self {
    Self {
    re: regex::Regex::new(r"\p{Emoji}|\p{Emoji_Presentation}|\p{Emoji_Modifier}|\p{Emoji_Modifier_Base}|\p{Emoji_Component}").unwrap(),
    }
    }

    /// Return byte indices of emojis in the text.
    pub fn find(&self, s: &str) -> Vec<usize> {
    let mut indices = Vec::new();
    let mut index = 0;

    for grapheme in s.graphemes(true) {
    if self.re.is_match(grapheme) {
    indices.push(index);
    }
    index += grapheme.bytes().len();
    }

    indices
    }
    }

    fn main() {
    let finder = EmojiFinder::new();

    dbg!(finder.find("HelloπŸ˜€πŸ˜‡")); // [5, 9]

    // ZWJ (3 byte)
    dbg!(finder.find("πŸ‘©πŸ˜‡")); // [0, 4]
    dbg!(finder.find("πŸ’»πŸ˜‡")); // [0, 4]
    dbg!(finder.find("πŸ‘©β€πŸ’»πŸ˜‡")); // [0, 11]

    // Family (4byte char + ZWJ for each)
    dbg!(finder.find("πŸ‘¨πŸ˜‡")); // [0, 4]
    dbg!(finder.find("πŸ‘¨β€πŸ‘¦πŸ˜‡")); // [0, 11]
    dbg!(finder.find("πŸ‘¨β€πŸ‘©β€πŸ‘¦πŸ˜‡")); // [0, 18]
    dbg!(finder.find("πŸ‘©β€πŸ‘©β€πŸ‘¦β€πŸ‘¦πŸ˜‡")); // [0, 25]

    // Variation (4 byte)
    dbg!(finder.find("πŸ‘πŸ˜‡")); // [0, 4]
    dbg!(finder.find("πŸ‘πŸ½πŸ˜‡")); // [0, 8]
    }