faster translation endpoint

This commit is contained in:
Daniel Goc
2026-03-13 13:50:15 +01:00
parent db921a7e78
commit d8a2e26896
7 changed files with 265 additions and 99 deletions

View File

@@ -5,8 +5,10 @@ import (
"encoding/xml" "encoding/xml"
"fmt" "fmt"
"io" "io"
"net/http"
"slices" "slices"
"strings" "strings"
"time"
"git.ma-al.com/goc_daniel/b2b/app/db" "git.ma-al.com/goc_daniel/b2b/app/db"
"git.ma-al.com/goc_daniel/b2b/app/model" "git.ma-al.com/goc_daniel/b2b/app/model"
@@ -26,27 +28,9 @@ type ProductDescriptionService struct {
func New() *ProductDescriptionService { func New() *ProductDescriptionService {
return &ProductDescriptionService{ return &ProductDescriptionService{
db: db.Get(), db: db.Get(),
client: openai.NewClient(option.WithAPIKey("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")), client: openai.NewClient(option.WithAPIKey("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"),
} option.WithHTTPClient(&http.Client{Timeout: 300 * time.Second})),
}
func isValidXHTML(s string) bool {
decoder := xml.NewDecoder(strings.NewReader(s))
hasStartTag := false
for {
tok, err := decoder.Token()
if err != nil {
if err == io.EOF {
return hasStartTag
}
return false
}
if _, ok := tok.(xml.StartElement); ok {
hasStartTag = true
}
} }
} }
@@ -80,7 +64,7 @@ func (s *ProductDescriptionService) SaveProductDescription(userID uint, productI
for i := 0; i < len(mustBeHTML); i++ { for i := 0; i < len(mustBeHTML); i++ {
if text, exists := updates[mustBeHTML[i]]; exists { if text, exists := updates[mustBeHTML[i]]; exists {
if !isValidXHTML(text) { if !isValidXHTML(text) {
return responseErrors.ErrInvalidHTML return responseErrors.ErrInvalidXHTML
} }
} }
} }
@@ -136,54 +120,41 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
return nil, err return nil, err
} }
request := "Translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website.\n\n" request := "Translate to " + lang.ISOCode + " without changing the html structure."
request += "\n" request += "\n\n<translation_of_product_description>"
request += "<translation_of_product_description>"
request += ProductDescription.Description request += ProductDescription.Description
request += "</translation_of_product_description>" request += "</translation_of_product_description>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_product_short_description>"
request += "\n"
request += "<translation_of_product_short_description>"
request += ProductDescription.DescriptionShort request += ProductDescription.DescriptionShort
request += "</translation_of_product_short_description>" request += "</translation_of_product_short_description>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_product_meta_description>"
request += "\n"
request += "<translation_of_product_meta_description>"
request += ProductDescription.MetaDescription request += ProductDescription.MetaDescription
request += "</translation_of_product_meta_description>" request += "</translation_of_product_meta_description>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_product_meta_title>"
request += "\n"
request += "<translation_of_product_meta_title>"
request += ProductDescription.MetaTitle request += ProductDescription.MetaTitle
request += "</translation_of_product_meta_title>" request += "</translation_of_product_meta_title>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_product_name>"
request += "\n"
request += "<translation_of_product_name>"
request += ProductDescription.Name request += ProductDescription.Name
request += "</translation_of_product_name>" request += "</translation_of_product_name>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_display_text_available_now>"
request += "\n"
request += "<translation_of_display_text_available_now>"
request += ProductDescription.AvailableNow request += ProductDescription.AvailableNow
request += "</translation_of_display_text_available_now>" request += "</translation_of_display_text_available_now>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_display_text_available_later>"
request += "\n"
request += "<translation_of_display_text_available_later>"
request += ProductDescription.AvailableLater request += ProductDescription.AvailableLater
request += "</translation_of_display_text_available_later>" request += "</translation_of_display_text_available_later>\n\n"
request += "\n" request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website." request += "\n\n<translation_of_product_usage>"
request += "\n"
request += "<translation_of_product_usage>"
request += ProductDescription.Usage request += ProductDescription.Usage
request += "</translation_of_product_usage>" request += "</translation_of_product_usage>"
request = cleanForPrompt(request)
openai_response, err := s.client.Responses.New(context.Background(), responses.ResponseNewParams{ openai_response, err := s.client.Responses.New(context.Background(), responses.ResponseNewParams{
Input: responses.ResponseNewParamsInputUnion{OfString: openai.String(request)}, Input: responses.ResponseNewParamsInputUnion{OfString: openai.String(request)},
Model: openai.ChatModelGPT4_1Mini, Model: openai.ChatModelGPT4_1Mini,
@@ -194,61 +165,136 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
} }
output := openai_response.OutputText() output := openai_response.OutputText()
// for debugging purposes // for testing purposes
// fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test.txt") // just pass the file name // fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test_out.txt") // just pass the file name
// output := string(fi) // output := string(fi)
success, match := GetStringInBetween(output, "<translation_of_product_description>", "</translation_of_product_description>") success, resolution := resolveResponse(ProductDescription.Description, output, "translation_of_product_description")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.Description = match ProductDescription.Description = resolution
success, match = GetStringInBetween(output, "<translation_of_product_short_description>", "</translation_of_product_short_description>") success, resolution = resolveResponse(ProductDescription.DescriptionShort, output, "translation_of_product_short_description")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.DescriptionShort = match ProductDescription.DescriptionShort = resolution
success, match = GetStringInBetween(output, "<translation_of_product_meta_description>", "</translation_of_product_meta_description>") success, resolution = resolveResponse(ProductDescription.MetaDescription, output, "translation_of_product_meta_description")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.MetaDescription = match ProductDescription.MetaDescription = resolution
success, match = GetStringInBetween(output, "<translation_of_product_meta_title>", "</translation_of_product_meta_title>") success, resolution = resolveResponse(ProductDescription.MetaTitle, output, "translation_of_product_meta_title")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.MetaTitle = match ProductDescription.MetaTitle = resolution
success, match = GetStringInBetween(output, "<translation_of_product_name>", "</translation_of_product_name>") success, resolution = resolveResponse(ProductDescription.Name, output, "translation_of_product_name")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.Name = match ProductDescription.Name = resolution
success, match = GetStringInBetween(output, "<translation_of_display_text_available_now>", "</translation_of_display_text_available_now>") success, resolution = resolveResponse(ProductDescription.AvailableNow, output, "translation_of_display_text_available_now")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.AvailableNow = match ProductDescription.AvailableNow = resolution
success, match = GetStringInBetween(output, "<translation_of_display_text_available_later>", "</translation_of_display_text_available_later>") success, resolution = resolveResponse(ProductDescription.AvailableLater, output, "translation_of_display_text_available_later")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.AvailableLater = match ProductDescription.AvailableLater = resolution
success, match = GetStringInBetween(output, "<translation_of_product_usage>", "</translation_of_product_usage>") success, resolution = resolveResponse(ProductDescription.Usage, output, "translation_of_product_usage")
if !success { if !success {
return nil, responseErrors.ErrOpenAIBadOutput return nil, responseErrors.ErrOpenAIBadOutput
} }
ProductDescription.Usage = match ProductDescription.Usage = resolution
return &ProductDescription, nil return &ProductDescription, nil
} }
// isValidXHTML checks if the string obeys the XHTML format
func isValidXHTML(s string) bool {
r := strings.NewReader(s)
d := xml.NewDecoder(r)
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d.Strict = true
d.AutoClose = xml.HTMLAutoClose
d.Entity = xml.HTMLEntity
for {
_, err := d.Token()
switch err {
case io.EOF:
return true // We're done, it's valid!
case nil:
default:
return false // Oops, something wasn't right
}
}
}
func cleanForPrompt(s string) string {
r := strings.NewReader(s)
d := xml.NewDecoder(r)
prompt := ""
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d.Strict = true
d.AutoClose = xml.HTMLAutoClose
d.Entity = xml.HTMLEntity
for {
token, err := d.Token()
if err == io.EOF {
break
}
switch v := token.(type) {
case xml.StartElement:
prompt += "<" + AttrName(v.Name)
for _, attr := range v.Attr {
if v.Name.Local == "img" && attr.Name.Local == "alt" {
prompt += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
prompt += ">"
case xml.EndElement:
prompt += "</" + AttrName(v.Name) + ">"
case xml.CharData:
prompt += string(v)
case xml.Comment:
case xml.ProcInst:
case xml.Directive:
}
}
return prompt
}
func resolveResponse(original string, response string, key string) (bool, string) {
success, match := GetStringInBetween(response, "<"+key+">", "</"+key+">")
if !success || !isValidXHTML(match) {
return false, ""
}
success, resolution := RebuildFromResponse("<"+key+">"+original+"</"+key+">", "<"+key+">"+match+"</"+key+">")
if !success {
return false, ""
}
return true, resolution[2+len(key) : len(resolution)-3-len(key)]
}
// GetStringInBetween returns empty string if no start or end string found // GetStringInBetween returns empty string if no start or end string found
func GetStringInBetween(str string, start string, end string) (success bool, result string) { func GetStringInBetween(str string, start string, end string) (success bool, result string) {
s := strings.Index(str, start) s := strings.Index(str, start)
@@ -263,3 +309,140 @@ func GetStringInBetween(str string, start string, end string) (success bool, res
return true, str[s : s+e] return true, str[s : s+e]
} }
// Rebuilds HTML using the original HTML as a template and the response as a source
// Assumes that both original and response have the exact same XML structure
func RebuildFromResponse(s_original string, s_response string) (bool, string) {
r_original := strings.NewReader(s_original)
d_original := xml.NewDecoder(r_original)
r_response := strings.NewReader(s_response)
d_response := xml.NewDecoder(r_response)
result := ""
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
d_original.Strict = true
d_original.AutoClose = xml.HTMLAutoClose
d_original.Entity = xml.HTMLEntity
d_response.Strict = true
d_response.AutoClose = xml.HTMLAutoClose
d_response.Entity = xml.HTMLEntity
token_original, err_original := d_original.Token()
token_response, err_response := d_response.Token()
for {
// err_original can only be EOF or nil.
if err_original != nil || err_response != nil {
if err_original != err_response {
return false, ""
}
return true, result
}
switch v_original := token_original.(type) {
case xml.StartElement:
switch v_response := token_response.(type) {
case xml.StartElement:
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
return false, ""
}
result += "<" + AttrName(v_original.Name)
for _, attr := range v_original.Attr {
if v_original.Name.Local != "img" || attr.Name.Local != "alt" {
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
for _, attr := range v_response.Attr {
if v_response.Name.Local == "img" && attr.Name.Local == "alt" {
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
}
}
result += ">"
case xml.CharData:
result += string(v_response)
token_response, err_response = d_response.Token()
continue
default:
return false, ""
}
case xml.EndElement:
switch v_response := token_response.(type) {
case xml.EndElement:
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
return false, ""
}
if v_original.Name.Local != "img" {
result += "</" + AttrName(v_original.Name) + ">"
}
case xml.CharData:
result += string(v_response)
token_response, err_response = d_response.Token()
continue
default:
return false, ""
}
case xml.CharData:
switch v_response := token_response.(type) {
case xml.CharData:
result += string(v_response)
case xml.StartElement:
result += string(v_original)
token_original, err_original = d_original.Token()
continue
case xml.EndElement:
result += string(v_original)
token_original, err_original = d_original.Token()
continue
default:
return false, ""
}
case xml.Comment:
result += "<!--" + string(v_original) + "-->"
token_original, err_original = d_original.Token()
continue
case xml.ProcInst:
if len(v_original.Inst) == 0 {
result += "<?" + v_original.Target + "?>"
} else {
result += "<?" + v_original.Target + " " + string(v_original.Inst) + "?>"
}
token_original, err_original = d_original.Token()
continue
case xml.Directive:
result += "<!" + string(v_original) + ">"
token_original, err_original = d_original.Token()
continue
}
token_original, err_original = d_original.Token()
token_response, err_response = d_response.Token()
}
}
func AttrName(name xml.Name) string {
if name.Space == "" {
return name.Local
} else {
return name.Space + ":" + name.Local
}
}

File diff suppressed because one or more lines are too long

View File

@@ -40,7 +40,7 @@ var (
// Typed errors for product description handler // Typed errors for product description handler
ErrBadAttribute = errors.New("bad attribute") ErrBadAttribute = errors.New("bad attribute")
ErrBadField = errors.New("this field can not be updated") ErrBadField = errors.New("this field can not be updated")
ErrInvalidHTML = errors.New("text is not in html format") ErrInvalidXHTML = errors.New("text is not in xhtml format")
ErrOpenAIResponseFail = errors.New("OpenAI responded with failure") ErrOpenAIResponseFail = errors.New("OpenAI responded with failure")
ErrOpenAIBadOutput = errors.New("OpenAI response does not obey the format") ErrOpenAIBadOutput = errors.New("OpenAI response does not obey the format")
) )
@@ -116,7 +116,7 @@ func GetErrorCode(c fiber.Ctx, err error) string {
return i18n.T_(c, "error.err_bad_attribute") return i18n.T_(c, "error.err_bad_attribute")
case errors.Is(err, ErrBadField): case errors.Is(err, ErrBadField):
return i18n.T_(c, "error.err_bad_field") return i18n.T_(c, "error.err_bad_field")
case errors.Is(err, ErrInvalidHTML): case errors.Is(err, ErrInvalidXHTML):
return i18n.T_(c, "error.err_invalid_html") return i18n.T_(c, "error.err_invalid_html")
case errors.Is(err, ErrOpenAIResponseFail): case errors.Is(err, ErrOpenAIResponseFail):
return i18n.T_(c, "error.err_openai_response_fail") return i18n.T_(c, "error.err_openai_response_fail")
@@ -154,7 +154,7 @@ func GetErrorStatus(err error) int {
errors.Is(err, ErrInvalidPassword), errors.Is(err, ErrInvalidPassword),
errors.Is(err, ErrBadAttribute), errors.Is(err, ErrBadAttribute),
errors.Is(err, ErrBadField), errors.Is(err, ErrBadField),
errors.Is(err, ErrInvalidHTML): errors.Is(err, ErrInvalidXHTML):
return fiber.StatusBadRequest return fiber.StatusBadRequest
case errors.Is(err, ErrEmailExists): case errors.Is(err, ErrEmailExists):
return fiber.StatusConflict return fiber.StatusConflict

View File

@@ -1 +1 @@
exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1 exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1

BIN
tmp/main

Binary file not shown.