diff --git a/internal/htmlutil/entries.go b/internal/htmlutil/entries.go index 70b3a4b..d0001b0 100644 --- a/internal/htmlutil/entries.go +++ b/internal/htmlutil/entries.go @@ -12,6 +12,7 @@ import ( var ( entryBlockRe = regexp.MustCompile(`(?s)data-entry-id="(\d+)"`) senderRe = regexp.MustCompile(`id="sender_entry_(\d+)"[^>]*>\s*([^<]+?)\s*<`) + senderEmailRe = regexp.MustCompile(`(?s)sender_entry_(\d+).*?entry__sender-email[^>]*>]*>[^<]*([^<]+)<`) timeRe = regexp.MustCompile(`]*datetime="([^"]+)"`) srcdocRe = regexp.MustCompile(`(?s)srcdoc="([^"]*trix-content[^"]*)"`) fullRecipientsRe = regexp.MustCompile(`(?s)entry__full-recipients[^>]*>(.*?)`) @@ -86,6 +87,12 @@ func ParseTopicEntriesHTML(html string) []models.Entry { senders[m[1]] = m[2] } } + senderEmails := map[string]string{} + for _, m := range senderEmailRe.FindAllStringSubmatch(html, -1) { + if _, exists := senderEmails[m[1]]; !exists { + senderEmails[m[1]] = strings.TrimSpace(m[2]) + } + } // Associate times with entries by finding the first