From a4eb3b52cf3c10ef3d540f8d6d6b63eab4ff0126 Mon Sep 17 00:00:00 2001 From: Daniel Goc Date: Thu, 2 Apr 2026 15:24:34 +0200 Subject: [PATCH] change incoming HTML to HTML4 --- .../productTranslationService.go | 64 +++++++++++++++++-- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/app/service/productTranslationService/productTranslationService.go b/app/service/productTranslationService/productTranslationService.go index 1b0a747..55f4f66 100644 --- a/app/service/productTranslationService/productTranslationService.go +++ b/app/service/productTranslationService/productTranslationService.go @@ -8,6 +8,7 @@ import ( "log" "net/http" "os" + "regexp" "slices" "strings" "time" @@ -99,8 +100,9 @@ func (s *ProductTranslationService) SaveProductDescription(userID uint, productI // check that fields description, description_short and usage, if they exist, have a valid html format mustBeHTML := []string{"description", "description_short", "usage"} for i := 0; i < len(mustBeHTML); i++ { - if text, exists := updates[mustBeHTML[i]]; exists { - if !isValidXHTML(text) { + if _, exists := updates[mustBeHTML[i]]; exists { + updates[mustBeHTML[i]] = parseAutoCloseTags(updates[mustBeHTML[i]]) + if !isValidXHTML(updates[mustBeHTML[i]]) { return responseErrors.ErrInvalidXHTML } } @@ -245,9 +247,17 @@ func cleanForPrompt(s string) string { } } - prompt += ">" + if slices.Contains(xml.HTMLAutoClose, v.Name.Local) { + prompt += "/>" + } else { + prompt += ">" + } + case xml.EndElement: - prompt += "" + if !slices.Contains(xml.HTMLAutoClose, v.Name.Local) { + prompt += "" + } + case xml.CharData: prompt += string(v) case xml.Comment: @@ -288,6 +298,43 @@ func getStringInBetween(str string, start string, end string) (success bool, res return true, str[s : s+e] } +// this converts input into HTML4 format. +// this really is ad-hoc solution, but it works. +func parseAutoCloseTags(s string) string { + alts := "" + for i, name := range xml.HTMLAutoClose { + if i > 0 { + alts += "|" + } + alts += name + } + + // remove closing tags + reClose := regexp.MustCompile(`(?i)<\s*\/\s*(?:` + alts + `)\s*>`) + s = reClose.ReplaceAllString(s, "") + + // convert + // matches that do NOT already end with /> + reOpen := regexp.MustCompile(`(?i)<\s*(` + alts + `)\b([^>]*?)>`) + s = reOpen.ReplaceAllStringFunc(s, func(tag string) string { + trimmed := strings.TrimSpace(tag) + + // Already self-closed: ,
+ if strings.HasSuffix(trimmed, "/>") { + return tag + } + + // Replace final > with /> + i := strings.LastIndex(tag, ">") + if i < 0 { + return tag + } + return tag[:i] + " />" + }) + + return s +} + // isValidXHTML checks if the string obeys the XHTML format func isValidXHTML(s string) bool { r := strings.NewReader(s) @@ -363,7 +410,12 @@ func rebuildFromResponse(s_original string, s_response string) (bool, string) { result += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value) } } - result += ">" + + if slices.Contains(xml.HTMLAutoClose, v_original.Name.Local) { + result += "/>" + } else { + result += ">" + } case xml.CharData: result += string(v_response) @@ -381,7 +433,7 @@ func rebuildFromResponse(s_original string, s_response string) (bool, string) { return false, "" } - if v_original.Name.Local != "img" { + if !slices.Contains(xml.HTMLAutoClose, v_original.Name.Local) { result += "" }