change incoming HTML to HTML4

This commit is contained in:
Daniel Goc
2026-04-02 15:24:34 +02:00
parent 551b5ef77d
commit a4eb3b52cf

View File

@@ -8,6 +8,7 @@ import (
"log" "log"
"net/http" "net/http"
"os" "os"
"regexp"
"slices" "slices"
"strings" "strings"
"time" "time"
@@ -99,8 +100,9 @@ func (s *ProductTranslationService) SaveProductDescription(userID uint, productI
// check that fields description, description_short and usage, if they exist, have a valid html format // check that fields description, description_short and usage, if they exist, have a valid html format
mustBeHTML := []string{"description", "description_short", "usage"} mustBeHTML := []string{"description", "description_short", "usage"}
for i := 0; i < len(mustBeHTML); i++ { for i := 0; i < len(mustBeHTML); i++ {
if text, exists := updates[mustBeHTML[i]]; exists { if _, exists := updates[mustBeHTML[i]]; exists {
if !isValidXHTML(text) { updates[mustBeHTML[i]] = parseAutoCloseTags(updates[mustBeHTML[i]])
if !isValidXHTML(updates[mustBeHTML[i]]) {
return responseErrors.ErrInvalidXHTML return responseErrors.ErrInvalidXHTML
} }
} }
@@ -245,9 +247,17 @@ func cleanForPrompt(s string) string {
} }
} }
if slices.Contains(xml.HTMLAutoClose, v.Name.Local) {
prompt += "/>"
} else {
prompt += ">" prompt += ">"
}
case xml.EndElement: case xml.EndElement:
if !slices.Contains(xml.HTMLAutoClose, v.Name.Local) {
prompt += "</" + attrName(v.Name) + ">" prompt += "</" + attrName(v.Name) + ">"
}
case xml.CharData: case xml.CharData:
prompt += string(v) prompt += string(v)
case xml.Comment: case xml.Comment:
@@ -288,6 +298,43 @@ func getStringInBetween(str string, start string, end string) (success bool, res
return true, str[s : s+e] return true, str[s : s+e]
} }
// this converts input into HTML4 format.
// this really is ad-hoc solution, but it works.
func parseAutoCloseTags(s string) string {
alts := ""
for i, name := range xml.HTMLAutoClose {
if i > 0 {
alts += "|"
}
alts += name
}
// remove closing </img> tags
reClose := regexp.MustCompile(`(?i)<\s*\/\s*(?:` + alts + `)\s*>`)
s = reClose.ReplaceAllString(s, "")
// convert <img ...> → <img ... />
// matches <img ...> that do NOT already end with />
reOpen := regexp.MustCompile(`(?i)<\s*(` + alts + `)\b([^>]*?)>`)
s = reOpen.ReplaceAllStringFunc(s, func(tag string) string {
trimmed := strings.TrimSpace(tag)
// Already self-closed: <img ... />, <br/>
if strings.HasSuffix(trimmed, "/>") {
return tag
}
// Replace final > with />
i := strings.LastIndex(tag, ">")
if i < 0 {
return tag
}
return tag[:i] + " />"
})
return s
}
// isValidXHTML checks if the string obeys the XHTML format // isValidXHTML checks if the string obeys the XHTML format
func isValidXHTML(s string) bool { func isValidXHTML(s string) bool {
r := strings.NewReader(s) r := strings.NewReader(s)
@@ -363,7 +410,12 @@ func rebuildFromResponse(s_original string, s_response string) (bool, string) {
result += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value) result += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value)
} }
} }
if slices.Contains(xml.HTMLAutoClose, v_original.Name.Local) {
result += "/>"
} else {
result += ">" result += ">"
}
case xml.CharData: case xml.CharData:
result += string(v_response) result += string(v_response)
@@ -381,7 +433,7 @@ func rebuildFromResponse(s_original string, s_response string) (bool, string) {
return false, "" return false, ""
} }
if v_original.Name.Local != "img" { if !slices.Contains(xml.HTMLAutoClose, v_original.Name.Local) {
result += "</" + attrName(v_original.Name) + ">" result += "</" + attrName(v_original.Name) + ">"
} }