faster translation endpoint

This commit is contained in:
Daniel Goc
2026-03-13 13:50:15 +01:00
parent db921a7e78
commit d8a2e26896
7 changed files with 265 additions and 99 deletions

View File

@@ -5,8 +5,10 @@ import (
"encoding/xml"
"fmt"
"io"
"net/http"
"slices"
"strings"
"time"
"git.ma-al.com/goc_daniel/b2b/app/db"
"git.ma-al.com/goc_daniel/b2b/app/model"
@@ -26,27 +28,9 @@ type ProductDescriptionService struct {
func New() *ProductDescriptionService {
return &ProductDescriptionService{
db: db.Get(),
client: openai.NewClient(option.WithAPIKey("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")),
}
}
func isValidXHTML(s string) bool {
decoder := xml.NewDecoder(strings.NewReader(s))
hasStartTag := false
for {
tok, err := decoder.Token()
if err != nil {
if err == io.EOF {
return hasStartTag
}
return false
}
if _, ok := tok.(xml.StartElement); ok {
hasStartTag = true
}
db: db.Get(),
client: openai.NewClient(option.WithAPIKey("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"),
option.WithHTTPClient(&http.Client{Timeout: 300 * time.Second})),
}
}
@@ -80,7 +64,7 @@ func (s *ProductDescriptionService) SaveProductDescription(userID uint, productI
for i := 0; i < len(mustBeHTML); i++ {
if text, exists := updates[mustBeHTML[i]]; exists {
if !isValidXHTML(text) {
return responseErrors.ErrInvalidHTML
return responseErrors.ErrInvalidXHTML
}
}
}
@@ -136,54 +120,41 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
return nil, err
}
request := "Translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website.\n\n"
request += "\n"
request += "<translation_of_product_description>"
request := "Translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_description>"
request += ProductDescription.Description
request += "</translation_of_product_description>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_product_short_description>"
request += "</translation_of_product_description>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_short_description>"
request += ProductDescription.DescriptionShort
request += "</translation_of_product_short_description>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_product_meta_description>"
request += "</translation_of_product_short_description>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_meta_description>"
request += ProductDescription.MetaDescription
request += "</translation_of_product_meta_description>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_product_meta_title>"
request += "</translation_of_product_meta_description>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_meta_title>"
request += ProductDescription.MetaTitle
request += "</translation_of_product_meta_title>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_product_name>"
request += "</translation_of_product_meta_title>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_name>"
request += ProductDescription.Name
request += "</translation_of_product_name>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_display_text_available_now>"
request += "</translation_of_product_name>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_display_text_available_now>"
request += ProductDescription.AvailableNow
request += "</translation_of_display_text_available_now>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_display_text_available_later>"
request += "</translation_of_display_text_available_now>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_display_text_available_later>"
request += ProductDescription.AvailableLater
request += "</translation_of_display_text_available_later>"
request += "\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
request += "\n"
request += "<translation_of_product_usage>"
request += "</translation_of_display_text_available_later>\n\n"
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "\n\n<translation_of_product_usage>"
request += ProductDescription.Usage
request += "</translation_of_product_usage>"
request = cleanForPrompt(request)
openai_response, err := s.client.Responses.New(context.Background(), responses.ResponseNewParams{
Input: responses.ResponseNewParamsInputUnion{OfString: openai.String(request)},
Model: openai.ChatModelGPT4_1Mini,
@@ -194,61 +165,136 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
}
output := openai_response.OutputText()
// for debugging purposes
// fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test.txt") // just pass the file name
// for testing purposes
// fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test_out.txt") // just pass the file name
// output := string(fi)
success, match := GetStringInBetween(output, "<translation_of_product_description>", "</translation_of_product_description>")
success, resolution := resolveResponse(ProductDescription.Description, output, "translation_of_product_description")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.Description = match
ProductDescription.Description = resolution
success, match = GetStringInBetween(output, "<translation_of_product_short_description>", "</translation_of_product_short_description>")
success, resolution = resolveResponse(ProductDescription.DescriptionShort, output, "translation_of_product_short_description")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.DescriptionShort = match
ProductDescription.DescriptionShort = resolution
success, match = GetStringInBetween(output, "<translation_of_product_meta_description>", "</translation_of_product_meta_description>")
success, resolution = resolveResponse(ProductDescription.MetaDescription, output, "translation_of_product_meta_description")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.MetaDescription = match
ProductDescription.MetaDescription = resolution
success, match = GetStringInBetween(output, "<translation_of_product_meta_title>", "</translation_of_product_meta_title>")
success, resolution = resolveResponse(ProductDescription.MetaTitle, output, "translation_of_product_meta_title")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.MetaTitle = match
ProductDescription.MetaTitle = resolution
success, match = GetStringInBetween(output, "<translation_of_product_name>", "</translation_of_product_name>")
success, resolution = resolveResponse(ProductDescription.Name, output, "translation_of_product_name")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.Name = match
ProductDescription.Name = resolution
success, match = GetStringInBetween(output, "<translation_of_display_text_available_now>", "</translation_of_display_text_available_now>")
success, resolution = resolveResponse(ProductDescription.AvailableNow, output, "translation_of_display_text_available_now")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.AvailableNow = match
ProductDescription.AvailableNow = resolution
success, match = GetStringInBetween(output, "<translation_of_display_text_available_later>", "</translation_of_display_text_available_later>")
success, resolution = resolveResponse(ProductDescription.AvailableLater, output, "translation_of_display_text_available_later")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.AvailableLater = match
ProductDescription.AvailableLater = resolution
success, match = GetStringInBetween(output, "<translation_of_product_usage>", "</translation_of_product_usage>")
success, resolution = resolveResponse(ProductDescription.Usage, output, "translation_of_product_usage")
if !success {
return nil, responseErrors.ErrOpenAIBadOutput
}
ProductDescription.Usage = match
ProductDescription.Usage = resolution
return &ProductDescription, nil
}
// isValidXHTML checks if the string obeys the XHTML format
func isValidXHTML(s string) bool {
r := strings.NewReader(s)
d := xml.NewDecoder(r)
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d.Strict = true
d.AutoClose = xml.HTMLAutoClose
d.Entity = xml.HTMLEntity
for {
_, err := d.Token()
switch err {
case io.EOF:
return true // We're done, it's valid!
case nil:
default:
return false // Oops, something wasn't right
}
}
}
func cleanForPrompt(s string) string {
r := strings.NewReader(s)
d := xml.NewDecoder(r)
prompt := ""
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d.Strict = true
d.AutoClose = xml.HTMLAutoClose
d.Entity = xml.HTMLEntity
for {
token, err := d.Token()
if err == io.EOF {
break
}
switch v := token.(type) {
case xml.StartElement:
prompt += "<" + AttrName(v.Name)
for _, attr := range v.Attr {
if v.Name.Local == "img" && attr.Name.Local == "alt" {
prompt += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
prompt += ">"
case xml.EndElement:
prompt += "</" + AttrName(v.Name) + ">"
case xml.CharData:
prompt += string(v)
case xml.Comment:
case xml.ProcInst:
case xml.Directive:
}
}
return prompt
}
func resolveResponse(original string, response string, key string) (bool, string) {
success, match := GetStringInBetween(response, "<"+key+">", "</"+key+">")
if !success || !isValidXHTML(match) {
return false, ""
}
success, resolution := RebuildFromResponse("<"+key+">"+original+"</"+key+">", "<"+key+">"+match+"</"+key+">")
if !success {
return false, ""
}
return true, resolution[2+len(key) : len(resolution)-3-len(key)]
}
// GetStringInBetween returns empty string if no start or end string found
func GetStringInBetween(str string, start string, end string) (success bool, result string) {
s := strings.Index(str, start)
@@ -263,3 +309,140 @@ func GetStringInBetween(str string, start string, end string) (success bool, res
return true, str[s : s+e]
}
// Rebuilds HTML using the original HTML as a template and the response as a source
// Assumes that both original and response have the exact same XML structure
func RebuildFromResponse(s_original string, s_response string) (bool, string) {
r_original := strings.NewReader(s_original)
d_original := xml.NewDecoder(r_original)
r_response := strings.NewReader(s_response)
d_response := xml.NewDecoder(r_response)
result := ""
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d_original.Strict = true
d_original.AutoClose = xml.HTMLAutoClose
d_original.Entity = xml.HTMLEntity
d_response.Strict = true
d_response.AutoClose = xml.HTMLAutoClose
d_response.Entity = xml.HTMLEntity
token_original, err_original := d_original.Token()
token_response, err_response := d_response.Token()
for {
// err_original can only be EOF or nil.
if err_original != nil || err_response != nil {
if err_original != err_response {
return false, ""
}
return true, result
}
switch v_original := token_original.(type) {
case xml.StartElement:
switch v_response := token_response.(type) {
case xml.StartElement:
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
return false, ""
}
result += "<" + AttrName(v_original.Name)
for _, attr := range v_original.Attr {
if v_original.Name.Local != "img" || attr.Name.Local != "alt" {
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
for _, attr := range v_response.Attr {
if v_response.Name.Local == "img" && attr.Name.Local == "alt" {
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
result += ">"
case xml.CharData:
result += string(v_response)
token_response, err_response = d_response.Token()
continue
default:
return false, ""
}
case xml.EndElement:
switch v_response := token_response.(type) {
case xml.EndElement:
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
return false, ""
}
if v_original.Name.Local != "img" {
result += "</" + AttrName(v_original.Name) + ">"
}
case xml.CharData:
result += string(v_response)
token_response, err_response = d_response.Token()
continue
default:
return false, ""
}
case xml.CharData:
switch v_response := token_response.(type) {
case xml.CharData:
result += string(v_response)
case xml.StartElement:
result += string(v_original)
token_original, err_original = d_original.Token()
continue
case xml.EndElement:
result += string(v_original)
token_original, err_original = d_original.Token()
continue
default:
return false, ""
}
case xml.Comment:
result += "<!--" + string(v_original) + "-->"
token_original, err_original = d_original.Token()
continue
case xml.ProcInst:
if len(v_original.Inst) == 0 {
result += "<?" + v_original.Target + "?>"
} else {
result += "<?" + v_original.Target + " " + string(v_original.Inst) + "?>"
}
token_original, err_original = d_original.Token()
continue
case xml.Directive:
result += "<!" + string(v_original) + ">"
token_original, err_original = d_original.Token()
continue
}
token_original, err_original = d_original.Token()
token_response, err_response = d_response.Token()
}
}
func AttrName(name xml.Name) string {
if name.Space == "" {
return name.Local
} else {
return name.Space + ":" + name.Local
}
}

File diff suppressed because one or more lines are too long

View File

@@ -40,7 +40,7 @@ var (
// Typed errors for product description handler
ErrBadAttribute = errors.New("bad attribute")
ErrBadField = errors.New("this field can not be updated")
ErrInvalidHTML = errors.New("text is not in html format")
ErrInvalidXHTML = errors.New("text is not in xhtml format")
ErrOpenAIResponseFail = errors.New("OpenAI responded with failure")
ErrOpenAIBadOutput = errors.New("OpenAI response does not obey the format")
)
@@ -116,7 +116,7 @@ func GetErrorCode(c fiber.Ctx, err error) string {
return i18n.T_(c, "error.err_bad_attribute")
case errors.Is(err, ErrBadField):
return i18n.T_(c, "error.err_bad_field")
case errors.Is(err, ErrInvalidHTML):
case errors.Is(err, ErrInvalidXHTML):
return i18n.T_(c, "error.err_invalid_html")
case errors.Is(err, ErrOpenAIResponseFail):
return i18n.T_(c, "error.err_openai_response_fail")
@@ -154,7 +154,7 @@ func GetErrorStatus(err error) int {
errors.Is(err, ErrInvalidPassword),
errors.Is(err, ErrBadAttribute),
errors.Is(err, ErrBadField),
errors.Is(err, ErrInvalidHTML):
errors.Is(err, ErrInvalidXHTML):
return fiber.StatusBadRequest
case errors.Is(err, ErrEmailExists):
return fiber.StatusConflict